import os
import time
import numpy as np
import pandas as pd
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
pd.set_option('display.max_columns', None)
# Chart drawing
import plotly as py
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
# Mute sklearn warnings
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=DeprecationWarning)
# Show charts when running kernel
#init_notebook_mode(connected=True)
# Change default background color for all visualizations
layout=go.Layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(250,250,250,0.8)')
fig = go.Figure(layout=layout)
templated_fig = pio.to_templated(fig)
pio.templates['my_template'] = templated_fig.layout.template
pio.templates.default = 'my_template'
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="xgboost")
warnings.filterwarnings("ignore")
def evaluate_regression_model(y_true, y_pred):
"""
Calculate and print evaluation metrics for a regression model.
Parameters:
- y_true: Actual values.
- y_pred: Predicted values.
Returns:
- Dictionary containing the evaluation metrics.
"""
# Calculate evaluation metrics
mse = mean_squared_error(y_true, y_pred)
rmse = mean_squared_error(y_true, y_pred, squared=False)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
# Print the results
print(f'Mean Squared Error (MSE): {np.round(mse,3)}')
print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
print(f'Mean Absolute Error (MAE): {np.round(mae,3)}')
print(f'R-squared (R2): {np.round(r2,3)}')
# Return results as a dictionary
results = {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'R2': r2
}
return results
def evaluate_regression_model2(y_true, y_pred):
"""
Calculate and print evaluation metrics for a regression model.
Parameters:
- y_true: Actual values.
- y_pred: Predicted values.
Returns:
- Dictionary containing the evaluation metrics.
"""
# Calculate evaluation metrics
mse = mean_squared_error(y_true, y_pred)
rmse = mean_squared_error(y_true, y_pred, squared=False)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
# # Print the results
# print(f'Mean Squared Error (MSE): {np.round(mse,3)}')
# print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
# print(f'Mean Absolute Error (MAE): {np.round(mae,3)}')
# print(f'R-squared (R2): {np.round(r2,3)}')
# Return results as a dictionary
results = {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'R2': r2
}
return results
# Returns RSI values
def rsi(df, periods = 14):
close = df['close']
close_delta = close.diff()
# Make two series: one for lower closes and one for higher closes
up = close_delta.clip(lower=0)
down = -1 * close_delta.clip(upper=0)
ma_up = up.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
ma_down = down.ewm(com = periods - 1, adjust=True, min_periods = periods).mean()
rsi = ma_up / ma_down
rsi = 100 - (100/(1 + rsi))
return rsi
def gain(x):
return ((x > 0) * x).sum()
def loss(x):
return ((x < 0) * x).sum()
# Calculate money flow index
# Contributed by Github member and chatgpt
def mfi(df, n=14):
high = df['high']
low = df['low']
close = df['close']
volume = df['volume']
typical_price = (high + low + close) / 3
money_flow = typical_price * volume
mf_sign = np.where(typical_price > typical_price.shift(1), 1, -1)
signed_mf = money_flow * mf_sign
# Calculate gain and loss using vectorized operations
positive_mf = np.where(signed_mf > 0, signed_mf, 0)
negative_mf = np.where(signed_mf < 0, -signed_mf, 0)
mf_avg_gain = pd.Series(positive_mf).rolling(n, min_periods=1).sum()
mf_avg_loss = pd.Series(negative_mf).rolling(n, min_periods=1).sum()
return (100 - 100 / (1 + mf_avg_gain / mf_avg_loss)).to_numpy()
def plot_regression_accuracy(y_true, y_pred):
"""
Create various plots to evaluate the accuracy of a linear regression model.
Parameters:
- y_true: Actual values.
- y_pred: Predicted values.
"""
# Scatter Plot
plt.scatter(y_true, y_pred)
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Scatter Plot of Actual vs Predicted Values')
plt.show()
# Residual Plot
residuals = y_true - y_pred
plt.scatter(y_pred, residuals)
plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.title('Residual Plot')
plt.show()
# Distribution of Residuals
sns.histplot(residuals, kde=True)
plt.xlabel('Residuals')
plt.ylabel('Frequency')
plt.title('Distribution of Residuals')
plt.show()
# Predicted vs Actual Line
plt.plot(y_true, y_true, linestyle='--', color='r', label='Perfect Fit')
plt.scatter(y_true, y_pred)
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Predicted vs Actual Values with Perfect Fit Line')
plt.legend()
plt.show()
def plot_predictions(df,prediction):
plot_test_df= df[df.date.dt.year>=2020]
plot_test_df['prediction'] = prediction
fig = make_subplots(rows=2, cols=1)
fig.add_trace(go.Scatter(x=df.date, y=df.close_1d_next,
name='Truth',
marker_color='LightSkyBlue'), row=1, col=1)
fig.add_trace(go.Scatter(x=plot_test_df.date,
y=plot_test_df.prediction,
name='Prediction',
marker_color='MediumPurple'), row=1, col=1)
# Add title and Y-axis title for the first subplot
fig.update_layout(title_text='Train Data and Test Data', title_x=0.5, title_y=0.9)
fig.update_yaxes(title_text='Prediction', row=1, col=1)
fig.add_trace(go.Scatter(x=plot_test_df.date,
y=y_test,
name='Truth',
marker_color='LightSkyBlue',
showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=plot_test_df.date,
y=prediction,
name='Prediction',
marker_color='MediumPurple',
showlegend=False), row=2, col=1)
fig.update_yaxes(title_text='Prediction', row=2, col=1)
fig.show()
def plot_feature_importance(model,X_train,top_features):
# Get feature importance scores (coefficients)
feature_importance = model.coef_
# Create a DataFrame to store feature names and importance scores
feature_importance_df = pd.DataFrame({'Feature': X_train.columns, 'Importance': np.abs(feature_importance)})
# Sort features by importance
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False).reset_index(drop=True)
top_features = top_features
# Plot feature importance
plt.figure(figsize=(20, 6))
plt.barh(range(len(feature_importance_df[:top_features])), feature_importance_df[:top_features]['Importance'], align="center")
plt.yticks(range(len(feature_importance_df[:top_features])), labels=feature_importance_df[:top_features]['Feature'])
plt.ylabel("Features")
plt.xlabel("Coefficient Magnitude")
plt.title(f"Top {top_features} Feature Importance Values")
plt.show()
return feature_importance_df
out_loc = '/Users/isapocan/Desktop/LSU/data/'
df = pd.read_parquet(out_loc+"stock_1d.parquet")
df.columns = df.columns.str.lower()
df.head()
| date | open | high | low | close | adj close | volume | symbol | security | gics sector | gics sub-industry | headquarters location | date added | cik | founded | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2013-01-02 | 94.190002 | 94.790001 | 93.959999 | 94.779999 | 67.895119 | 3206700.0 | MMM | 3M | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1957-03-04 | 66740 | 1902 |
| 1 | 2013-01-03 | 94.339996 | 94.930000 | 94.129997 | 94.669998 | 67.816322 | 2704600.0 | MMM | 3M | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1957-03-04 | 66740 | 1902 |
| 2 | 2013-01-04 | 94.790001 | 95.480003 | 94.540001 | 95.370003 | 68.317757 | 2704900.0 | MMM | 3M | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1957-03-04 | 66740 | 1902 |
| 3 | 2013-01-07 | 95.019997 | 95.730003 | 94.760002 | 95.489998 | 68.403717 | 2745800.0 | MMM | 3M | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1957-03-04 | 66740 | 1902 |
| 4 | 2013-01-08 | 95.169998 | 95.750000 | 95.099998 | 95.500000 | 68.410889 | 2655500.0 | MMM | 3M | Industrials | Industrial Conglomerates | Saint Paul, Minnesota | 1957-03-04 | 66740 | 1902 |
df = df[df['symbol']=='MDLZ']
df['ema_9'] = df['close'].ewm(9).mean().shift()
df['sma_5'] = df['close'].rolling(5).mean().shift()
df['sma_10'] = df['close'].rolling(10).mean().shift()
df['sma_15'] = df['close'].rolling(15).mean().shift()
df['sma_30'] = df['close'].rolling(30).mean().shift()
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2733 entries, 852843 to 855575 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date 2733 non-null datetime64[ns] 1 open 2733 non-null float64 2 high 2733 non-null float64 3 low 2733 non-null float64 4 close 2733 non-null float64 5 adj close 2733 non-null float64 6 volume 2733 non-null float64 7 symbol 2733 non-null object 8 security 2733 non-null object 9 gics sector 2733 non-null object 10 gics sub-industry 2733 non-null object 11 headquarters location 2733 non-null object 12 date added 2733 non-null object 13 cik 2733 non-null int64 14 founded 2733 non-null object 15 ema_9 2732 non-null float64 16 sma_5 2728 non-null float64 17 sma_10 2723 non-null float64 18 sma_15 2718 non-null float64 19 sma_30 2703 non-null float64 dtypes: datetime64[ns](1), float64(11), int64(1), object(7) memory usage: 448.4+ KB
df['rsi'] = rsi(df) #.fillna(0)
df['mfi'] = mfi(df, 14)
df[['date','close','ema_9','sma_5','sma_10','sma_15','sma_30','rsi','mfi']]
| date | close | ema_9 | sma_5 | sma_10 | sma_15 | sma_30 | rsi | mfi | |
|---|---|---|---|---|---|---|---|---|---|
| 852843 | 2013-01-02 | 26.670000 | NaN | NaN | NaN | NaN | NaN | NaN | 0.000000 |
| 852844 | 2013-01-03 | 26.639999 | 26.670000 | NaN | NaN | NaN | NaN | NaN | 33.904295 |
| 852845 | 2013-01-04 | 26.740000 | 26.654210 | NaN | NaN | NaN | NaN | NaN | 48.695375 |
| 852846 | 2013-01-07 | 26.660000 | 26.685867 | NaN | NaN | NaN | NaN | NaN | 39.919745 |
| 852847 | 2013-01-08 | 26.680000 | 26.678345 | NaN | NaN | NaN | NaN | NaN | 55.233142 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 855571 | 2023-11-02 | 67.970001 | 65.538988 | 65.934001 | 65.321001 | 64.406001 | 65.993000 | 60.257764 | 89.207420 |
| 855572 | 2023-11-03 | 68.820000 | 65.782090 | 66.398001 | 65.697001 | 64.868001 | 65.901667 | 63.726091 | 89.458580 |
| 855573 | 2023-11-06 | 68.239998 | 66.085881 | 67.160001 | 66.169001 | 65.354001 | 65.848000 | 59.885606 | 83.710782 |
| 855574 | 2023-11-07 | 68.489998 | 66.301292 | 67.612000 | 66.594001 | 65.728667 | 65.799000 | 60.977252 | 75.937617 |
| 855575 | 2023-11-08 | 69.019997 | 66.520163 | 68.067999 | 66.888000 | 66.058667 | 65.729667 | 63.259914 | 75.566164 |
2733 rows × 9 columns
df[['rsi','mfi']].corr()
| rsi | mfi | |
|---|---|---|
| rsi | 1.000000 | 0.698958 |
| mfi | 0.698958 | 1.000000 |
EMA_12 = pd.Series(df['close'].ewm(span=12, min_periods=12).mean())
EMA_26 = pd.Series(df['close'].ewm(span=26, min_periods=26).mean())
df['macd'] = pd.Series(EMA_12 - EMA_26)
df['macd_signal'] = pd.Series(df.macd.ewm(span=9, min_periods=9).mean())
df[(~df['macd'].isna()) & (~df['macd_signal'].isna())][['macd','macd_signal']].head()
| macd | macd_signal | |
|---|---|---|
| 852876 | -0.147786 | -0.050945 |
| 852877 | -0.175230 | -0.078792 |
| 852878 | -0.198438 | -0.104970 |
| 852879 | -0.235462 | -0.132994 |
| 852880 | -0.226841 | -0.152855 |
### predict next day
df['close_1d_next'] = df['close'].shift(-1)
df[['date','close','close_1d_next']].head()
| date | close | close_1d_next | |
|---|---|---|---|
| 852843 | 2013-01-02 | 26.670000 | 26.639999 |
| 852844 | 2013-01-03 | 26.639999 | 26.740000 |
| 852845 | 2013-01-04 | 26.740000 | 26.660000 |
| 852846 | 2013-01-07 | 26.660000 | 26.680000 |
| 852847 | 2013-01-08 | 26.680000 | 27.049999 |
df['close_1d_ago'] = df['close'].shift(1)
df['close_3d_ago'] = df['close'].shift(3)
df['close_5d_ago'] = df['close'].shift(5)
df['close_1w_ago'] = df['close'].shift(7)
df['close_2w_ago'] = df['close'].shift(14)
df['close_3w_ago'] = df['close'].shift(21)
df['close_4w_ago'] = df['close'].shift(28)
df['adj_close_1d_ago'] = df['adj close'].shift(1)
df['adj_close_3d_ago'] = df['adj close'].shift(3)
df['adj_close_5d_ago'] = df['adj close'].shift(5)
df['adj_close_1w_ago'] = df['adj close'].shift(7)
df['adj_close_2w_ago'] = df['adj close'].shift(14)
df['adj_close_3w_ago'] = df['adj close'].shift(21)
df['adj_close_4w_ago'] = df['adj close'].shift(28)
df['open_1d_ago'] = df['open'].shift(1)
df['open_3d_ago'] = df['open'].shift(3)
df['open_5d_ago'] = df['open'].shift(5)
df['open_1w_ago'] = df['open'].shift(7)
df['open_2w_ago'] = df['open'].shift(14)
df['open_3w_ago'] = df['open'].shift(21)
df['open_4w_ago'] = df['open'].shift(28)
df['high_1d_ago'] = df['high'].shift(1)
df['high_3d_ago'] = df['high'].shift(3)
df['high_5d_ago'] = df['high'].shift(5)
df['high_1w_ago'] = df['high'].shift(7)
df['high_2w_ago'] = df['high'].shift(14)
df['high_3w_ago'] = df['high'].shift(21)
df['high_4w_ago'] = df['high'].shift(28)
df['low_1d_ago'] = df['low'].shift(1)
df['low_3d_ago'] = df['low'].shift(3)
df['low_5d_ago'] = df['low'].shift(5)
df['low_1w_ago'] = df['low'].shift(7)
df['low_2w_ago'] = df['low'].shift(14)
df['low_3w_ago'] = df['low'].shift(21)
df['low_4w_ago'] = df['low'].shift(28)
df['volume_1d_ago'] = df['volume'].shift(1)
df['volume_3d_ago'] = df['volume'].shift(3)
df['volume_5d_ago'] = df['volume'].shift(5)
df['volume_1w_ago'] = df['volume'].shift(7)
df['volume_2w_ago'] = df['volume'].shift(14)
df['volume_3w_ago'] = df['volume'].shift(21)
df['volume_4w_ago'] = df['volume'].shift(28)
df['open_3d_avg'] = df['open'].rolling(window=3).mean()
df['open_5d_avg'] = df['open'].rolling(window=5).mean()
df['open_7d_avg'] = df['open'].rolling(window=7).mean()
df['open_10d_avg'] = df['open'].rolling(window=10).mean()
df['open_15d_avg'] = df['open'].rolling(window=15).mean()
df['open_30d_avg'] = df['open'].rolling(window=30).mean()
df['high_3d_avg'] = df['high'].rolling(window=3).mean()
df['high_5d_avg'] = df['high'].rolling(window=5).mean()
df['high_7d_avg'] = df['high'].rolling(window=7).mean()
df['high_10d_avg'] = df['high'].rolling(window=10).mean()
df['high_15d_avg'] = df['high'].rolling(window=15).mean()
df['high_30d_avg'] = df['high'].rolling(window=30).mean()
df['low_3d_avg'] = df['low'].rolling(window=3).mean()
df['low_5d_avg'] = df['low'].rolling(window=5).mean()
df['low_7d_avg'] = df['low'].rolling(window=7).mean()
df['low_10d_avg'] = df['low'].rolling(window=10).mean()
df['low_15d_avg'] = df['low'].rolling(window=15).mean()
df['low_30d_avg'] = df['low'].rolling(window=30).mean()
df['volume_3d_avg'] = df['volume'].rolling(window=3).mean()
df['volume_5d_avg'] = df['volume'].rolling(window=5).mean()
df['volume_7d_avg'] = df['volume'].rolling(window=7).mean()
df['volume_10d_avg'] = df['volume'].rolling(window=10).mean()
df['volume_15d_avg'] = df['volume'].rolling(window=15).mean()
df['volume_30d_avg'] = df['volume'].rolling(window=30).mean()
df['adj_close_3d_avg'] = df['adj close'].rolling(window=3).mean()
df['adj_close_5d_avg'] = df['adj close'].rolling(window=5).mean()
df['adj_close_7d_avg'] = df['adj close'].rolling(window=7).mean()
df['adj_close_10d_avg'] = df['adj close'].rolling(window=10).mean()
df['adj_close_15d_avg'] = df['adj close'].rolling(window=15).mean()
df['adj_close_30d_avg'] = df['adj close'].rolling(window=30).mean()
df = df.dropna().reset_index(drop=True)
df.head()
| date | open | high | low | close | adj close | volume | symbol | security | gics sector | gics sub-industry | headquarters location | date added | cik | founded | ema_9 | sma_5 | sma_10 | sma_15 | sma_30 | rsi | mfi | macd | macd_signal | close_1d_next | close_1d_ago | close_3d_ago | close_5d_ago | close_1w_ago | close_2w_ago | close_3w_ago | close_4w_ago | adj_close_1d_ago | adj_close_3d_ago | adj_close_5d_ago | adj_close_1w_ago | adj_close_2w_ago | adj_close_3w_ago | adj_close_4w_ago | open_1d_ago | open_3d_ago | open_5d_ago | open_1w_ago | open_2w_ago | open_3w_ago | open_4w_ago | high_1d_ago | high_3d_ago | high_5d_ago | high_1w_ago | high_2w_ago | high_3w_ago | high_4w_ago | low_1d_ago | low_3d_ago | low_5d_ago | low_1w_ago | low_2w_ago | low_3w_ago | low_4w_ago | volume_1d_ago | volume_3d_ago | volume_5d_ago | volume_1w_ago | volume_2w_ago | volume_3w_ago | volume_4w_ago | open_3d_avg | open_5d_avg | open_7d_avg | open_10d_avg | open_15d_avg | open_30d_avg | high_3d_avg | high_5d_avg | high_7d_avg | high_10d_avg | high_15d_avg | high_30d_avg | low_3d_avg | low_5d_avg | low_7d_avg | low_10d_avg | low_15d_avg | low_30d_avg | volume_3d_avg | volume_5d_avg | volume_7d_avg | volume_10d_avg | volume_15d_avg | volume_30d_avg | adj_close_3d_avg | adj_close_5d_avg | adj_close_7d_avg | adj_close_10d_avg | adj_close_15d_avg | adj_close_30d_avg | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2013-02-20 | 27.070000 | 27.150000 | 26.950001 | 27.030001 | 21.735399 | 17057200.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 27.499536 | 27.136 | 27.518 | 27.642000 | 27.589000 | 41.633625 | 53.176274 | -0.147786 | -0.050945 | 26.820000 | 26.959999 | 26.570000 | 27.680000 | 27.76 | 27.730000 | 28.080000 | 27.049999 | 21.679117 | 21.365499 | 22.258080 | 22.322405 | 22.298285 | 22.579723 | 21.751484 | 26.750000 | 26.690001 | 27.700001 | 27.799999 | 27.830000 | 27.969999 | 26.790001 | 27.190001 | 27.020000 | 27.830000 | 28.100000 | 27.980000 | 28.100000 | 27.080000 | 26.750000 | 26.450001 | 27.270000 | 27.750000 | 27.67 | 27.820000 | 26.68 | 18297500.0 | 37728900.0 | 14931000.0 | 11159200.0 | 5800400.0 | 15906900.0 | 11671400.0 | 26.886667 | 27.018 | 27.217143 | 27.386 | 27.553333 | 27.536667 | 27.136667 | 27.248000 | 27.410000 | 27.618 | 27.779333 | 27.754667 | 26.766667 | 26.842 | 27.015714 | 27.224 | 27.411333 | 27.382333 | 1.904973e+07 | 21756140.0 | 1.907480e+07 | 17005580.0 | 1.419575e+07 | 1.352419e+07 | 21.633545 | 21.716101 | 21.878994 | 22.053831 | 22.184635 | 22.194819 |
| 1 | 2013-02-21 | 26.990000 | 27.049999 | 26.639999 | 26.820000 | 21.566534 | 16936600.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 27.451239 | 27.006 | 27.426 | 27.588667 | 27.601333 | 38.257648 | 47.431888 | -0.175230 | -0.078792 | 26.770000 | 27.030001 | 26.719999 | 27.750000 | 27.75 | 27.790001 | 27.559999 | 27.309999 | 21.735399 | 21.486118 | 22.314371 | 22.314371 | 22.346525 | 22.161583 | 21.960548 | 27.070000 | 26.840000 | 27.740000 | 27.730000 | 27.650000 | 27.730000 | 27.129999 | 27.150000 | 27.070000 | 27.809999 | 27.799999 | 27.950001 | 28.040001 | 27.340000 | 26.950001 | 26.600000 | 27.459999 | 27.629999 | 27.65 | 27.299999 | 27.09 | 17057200.0 | 21794500.0 | 13902600.0 | 9811900.0 | 7541300.0 | 18213200.0 | 16348500.0 | 26.936666 | 26.868 | 27.111429 | 27.295 | 27.497333 | 27.552333 | 27.130000 | 27.096000 | 27.302857 | 27.510 | 27.717333 | 27.759000 | 26.780000 | 26.678 | 26.874286 | 27.108 | 27.342667 | 27.388333 | 1.743043e+07 | 22362940.0 | 2.009261e+07 | 17608410.0 | 1.493817e+07 | 1.361005e+07 | 21.660350 | 21.566534 | 21.772160 | 21.958945 | 22.135851 | 22.198572 |
| 2 | 2013-02-22 | 26.889999 | 27.129999 | 26.730000 | 26.770000 | 21.526327 | 16664800.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 27.386494 | 26.820 | 27.308 | 27.528000 | 27.606000 | 37.478423 | 48.958416 | -0.198438 | -0.104970 | 26.490000 | 26.820000 | 26.959999 | 26.570000 | 27.68 | 28.219999 | 27.790001 | 27.420000 | 21.566534 | 21.679117 | 21.365499 | 22.258080 | 22.692308 | 22.346525 | 22.049007 | 26.990000 | 26.750000 | 26.690001 | 27.700001 | 28.000000 | 27.500000 | 27.350000 | 27.049999 | 27.190001 | 27.020000 | 27.830000 | 28.320000 | 27.889999 | 27.540001 | 26.639999 | 26.750000 | 26.450001 | 27.270000 | 27.93 | 27.350000 | 27.25 | 16936600.0 | 18297500.0 | 37728900.0 | 14931000.0 | 9623100.0 | 15212300.0 | 10162600.0 | 26.983333 | 26.908 | 26.995714 | 27.220 | 27.446667 | 27.555667 | 27.109999 | 27.118000 | 27.202857 | 27.415 | 27.662666 | 27.760667 | 26.773333 | 26.734 | 26.797143 | 27.023 | 27.281333 | 27.390000 | 1.688620e+07 | 18150120.0 | 2.034030e+07 | 17828420.0 | 1.554640e+07 | 1.377650e+07 | 21.609420 | 21.598699 | 21.667624 | 21.856822 | 22.081171 | 22.191067 |
| 3 | 2013-02-25 | 26.790001 | 27.080000 | 26.480000 | 26.490000 | 21.301172 | 15527100.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 27.323424 | 26.860 | 27.181 | 27.460000 | 27.596667 | 33.378362 | 47.675126 | -0.235462 | -0.132994 | 26.950001 | 26.770000 | 27.030001 | 26.719999 | 27.75 | 27.879999 | 27.830000 | 27.480000 | 21.526327 | 21.735399 | 21.486118 | 22.314371 | 22.418896 | 22.378695 | 22.097254 | 26.889999 | 27.070000 | 26.840000 | 27.740000 | 28.010000 | 27.930000 | 27.459999 | 27.129999 | 27.150000 | 27.070000 | 27.809999 | 28.150000 | 28.030001 | 27.520000 | 26.730000 | 26.950001 | 26.600000 | 27.459999 | 27.83 | 27.639999 | 27.17 | 16664800.0 | 17057200.0 | 21794500.0 | 13902600.0 | 8954300.0 | 14444500.0 | 8688200.0 | 26.890000 | 26.898 | 26.860000 | 27.119 | 27.366000 | 27.544333 | 27.086666 | 27.120000 | 27.098571 | 27.313 | 27.580000 | 27.752000 | 26.616666 | 26.710 | 26.657143 | 26.896 | 27.184667 | 27.369667 | 1.637617e+07 | 16896640.0 | 2.057237e+07 | 18265210.0 | 1.594000e+07 | 1.374912e+07 | 21.464678 | 21.561710 | 21.522881 | 21.754699 | 21.988429 | 22.169087 |
| 4 | 2013-02-26 | 26.530001 | 26.980000 | 26.510000 | 26.950001 | 21.671074 | 13702900.0 | MDLZ | Mondelez International | Consumer Staples | Packaged Foods & Meats | Chicago, Illinois | 2012-10-02 | 1103982 | 2012 | 27.238357 | 26.814 | 27.054 | 27.344667 | 27.569333 | 44.181951 | 48.178912 | -0.226841 | -0.152855 | 27.570000 | 26.490000 | 26.820000 | 26.959999 | 26.57 | 27.950001 | 27.780001 | 27.709999 | 21.301172 | 21.566534 | 21.679117 | 21.365499 | 22.475189 | 22.338484 | 22.282200 | 26.790001 | 26.990000 | 26.750000 | 26.690001 | 27.950001 | 27.830000 | 27.580000 | 27.080000 | 27.049999 | 27.190001 | 27.020000 | 28.110001 | 27.889999 | 27.740000 | 26.480000 | 26.639999 | 26.750000 | 26.450001 | 27.85 | 27.690001 | 27.34 | 15527100.0 | 16936600.0 | 18297500.0 | 37728900.0 | 10961400.0 | 12066800.0 | 9863200.0 | 26.736667 | 26.854 | 26.837143 | 26.999 | 27.267333 | 27.517000 | 27.063333 | 27.077999 | 27.092857 | 27.231 | 27.502000 | 27.733333 | 26.573333 | 26.662 | 26.665714 | 26.784 | 27.096667 | 27.345000 | 1.529827e+07 | 15977720.0 | 1.714009e+07 | 18654310.0 | 1.625657e+07 | 1.386713e+07 | 21.499524 | 21.560101 | 21.566535 | 21.690369 | 21.938574 | 22.156490 |
# # Calculate the index for the 70-30 split
# split_index = int(0.7 * len(df))
# # Split the DataFrame into training and testing sets
# train_df = df.iloc[:split_index]
# test_df = df.iloc[split_index:]
# Split the DataFrame into training and testing sets
train_df = df[df.date.dt.year<2020]
test_df = df[df.date.dt.year>=2020]
print(f"Train days: {len(train_df)}, Test days: {len(test_df)}")
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_df.date, y=train_df.close_1d_next, name='Training'))
fig.add_trace(go.Scatter(x=test_df.date, y=test_df.close_1d_next, name='Test'))
fig.show()
Train days: 1729, Test days: 970
drop_cols1 = ['date','open','high','low','close','adj close','volume','symbol','security',
'gics sector','gics sub-industry','headquarters location','date added','cik','founded']
train_df = train_df.drop(drop_cols1, 1)
test_df = test_df.drop(drop_cols1, 1)
# target column is next day's close price
y_train = train_df['close_1d_next'].copy()
X_train = train_df.drop(['close_1d_next'], 1)
# target column is next day's close price
y_test = test_df['close_1d_next'].copy()
X_test = test_df.drop(['close_1d_next'], 1)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
def train_and_evaluate_models(X_train_scaled,y_train,X_test_scaled,y_test):
"""
Train and evaluate multiple regression models on a given dataframe.
Parameters:
- dataframe: Pandas DataFrame containing the dataset.
- target_column: Name of the target column (dependent variable).
- features_columns: List of column names used as features (independent variables).
Returns:
- A DataFrame containing evaluation metrics for each model.
"""
# Split the data into features (X) and target variable (y)
# X = dataframe[features_columns]
# y = dataframe[target_column]
# Split the data into training and testing sets (70-30 split)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)
# Initialize the models
models = {
'Linear Regression': LinearRegression(),
'Ridge Regression': Ridge(),
'Lasso Regression': Lasso(),
'Elastic Net': ElasticNet(),
'SVR': SVR(),
'K-Neighbors Regressor': KNeighborsRegressor(),
'Decision Tree': DecisionTreeRegressor(),
'Random Forest': RandomForestRegressor(),
'Gradient Boosting': GradientBoostingRegressor(),
'AdaBoost': AdaBoostRegressor(),
'XGBoost': XGBRegressor(),
'CatBoost': CatBoostRegressor()
}
# Initialize a DataFrame to store the evaluation metrics
metrics_df = pd.DataFrame(columns=['Model', 'Mean Squared Error', 'Mean Absolute Error', 'R2 Score'])
# Train and evaluate each model
for model_name, model in models.items():
start_time = time.time()
# Train the model
model.fit(X_train_scaled, y_train)
end_time = time.time() # Record the end time
training_time = end_time - start_time
# Make predictions
y_pred = model.predict(X_test_scaled)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# Store the metrics in the DataFrame
metrics_df = metrics_df.append({
'Model': model_name,
'Mean Squared Error': mse,
'Mean Absolute Error': mae,
'R2 Score': r2,
'Training Time (s)': training_time
}, ignore_index=True)
metrics_df = metrics_df.sort_values(by=['R2 Score'],ascending=False)
return metrics_df
df_compare = train_and_evaluate_models(X_train,y_train,X_test,y_test)
Learning rate set to 0.044643 0: learn: 6.0670144 total: 62.4ms remaining: 1m 2s 1: learn: 5.8228277 total: 65.9ms remaining: 32.9s 2: learn: 5.5825438 total: 69.8ms remaining: 23.2s 3: learn: 5.3589845 total: 73.3ms remaining: 18.3s 4: learn: 5.1400669 total: 76.9ms remaining: 15.3s 5: learn: 4.9338718 total: 80.6ms remaining: 13.4s 6: learn: 4.7348036 total: 84.5ms remaining: 12s 7: learn: 4.5462470 total: 87.8ms remaining: 10.9s 8: learn: 4.3663666 total: 91.5ms remaining: 10.1s 9: learn: 4.1940087 total: 94.9ms remaining: 9.39s 10: learn: 4.0326464 total: 98.6ms remaining: 8.87s 11: learn: 3.8728104 total: 102ms remaining: 8.4s 12: learn: 3.7241421 total: 106ms remaining: 8.01s 13: learn: 3.5819820 total: 109ms remaining: 7.69s 14: learn: 3.4443416 total: 113ms remaining: 7.41s 15: learn: 3.3131010 total: 116ms remaining: 7.15s 16: learn: 3.1862951 total: 120ms remaining: 6.93s 17: learn: 3.0636007 total: 123ms remaining: 6.72s 18: learn: 2.9525545 total: 127ms remaining: 6.55s 19: learn: 2.8449392 total: 130ms remaining: 6.39s 20: learn: 2.7377198 total: 134ms remaining: 6.24s 21: learn: 2.6359381 total: 137ms remaining: 6.1s 22: learn: 2.5393906 total: 141ms remaining: 5.97s 23: learn: 2.4447919 total: 144ms remaining: 5.85s 24: learn: 2.3592925 total: 148ms remaining: 5.76s 25: learn: 2.2778166 total: 151ms remaining: 5.67s 26: learn: 2.1955454 total: 155ms remaining: 5.58s 27: learn: 2.1181936 total: 158ms remaining: 5.5s 28: learn: 2.0456841 total: 162ms remaining: 5.41s 29: learn: 1.9773187 total: 165ms remaining: 5.35s 30: learn: 1.9140404 total: 169ms remaining: 5.27s 31: learn: 1.8503968 total: 172ms remaining: 5.2s 32: learn: 1.7872718 total: 175ms remaining: 5.13s 33: learn: 1.7298101 total: 178ms remaining: 5.07s 34: learn: 1.6773427 total: 182ms remaining: 5.01s 35: learn: 1.6263746 total: 185ms remaining: 4.95s 36: learn: 1.5799050 total: 188ms remaining: 4.89s 37: learn: 1.5350898 total: 191ms remaining: 4.84s 38: learn: 1.4896754 total: 195ms remaining: 4.8s 39: learn: 1.4440908 total: 199ms remaining: 4.76s 40: learn: 1.4038854 total: 202ms remaining: 4.72s 41: learn: 1.3654869 total: 206ms remaining: 4.69s 42: learn: 1.3304415 total: 210ms remaining: 4.67s 43: learn: 1.2940654 total: 214ms remaining: 4.65s 44: learn: 1.2608561 total: 217ms remaining: 4.61s 45: learn: 1.2267358 total: 220ms remaining: 4.57s 46: learn: 1.1934982 total: 223ms remaining: 4.53s 47: learn: 1.1672852 total: 227ms remaining: 4.5s 48: learn: 1.1375280 total: 230ms remaining: 4.47s 49: learn: 1.1138729 total: 233ms remaining: 4.43s 50: learn: 1.0899582 total: 236ms remaining: 4.4s 51: learn: 1.0649512 total: 239ms remaining: 4.37s 52: learn: 1.0415461 total: 243ms remaining: 4.34s 53: learn: 1.0206758 total: 246ms remaining: 4.31s 54: learn: 0.9991569 total: 249ms remaining: 4.28s 55: learn: 0.9795833 total: 253ms remaining: 4.26s 56: learn: 0.9603286 total: 256ms remaining: 4.23s 57: learn: 0.9430651 total: 259ms remaining: 4.21s 58: learn: 0.9257478 total: 263ms remaining: 4.19s 59: learn: 0.9087157 total: 266ms remaining: 4.16s 60: learn: 0.8934800 total: 269ms remaining: 4.14s 61: learn: 0.8810773 total: 272ms remaining: 4.11s 62: learn: 0.8666743 total: 275ms remaining: 4.09s 63: learn: 0.8534041 total: 278ms remaining: 4.07s 64: learn: 0.8414198 total: 282ms remaining: 4.05s 65: learn: 0.8286451 total: 285ms remaining: 4.03s 66: learn: 0.8182783 total: 288ms remaining: 4.01s 67: learn: 0.8072075 total: 291ms remaining: 3.99s 68: learn: 0.7983925 total: 294ms remaining: 3.97s 69: learn: 0.7894196 total: 297ms remaining: 3.95s 70: learn: 0.7802497 total: 301ms remaining: 3.93s 71: learn: 0.7722262 total: 304ms remaining: 3.92s 72: learn: 0.7636665 total: 307ms remaining: 3.9s 73: learn: 0.7559532 total: 310ms remaining: 3.88s 74: learn: 0.7484620 total: 314ms remaining: 3.87s 75: learn: 0.7409614 total: 317ms remaining: 3.85s 76: learn: 0.7340694 total: 320ms remaining: 3.83s 77: learn: 0.7272183 total: 323ms remaining: 3.82s 78: learn: 0.7206823 total: 327ms remaining: 3.81s 79: learn: 0.7145614 total: 330ms remaining: 3.79s 80: learn: 0.7090921 total: 333ms remaining: 3.78s 81: learn: 0.7039718 total: 336ms remaining: 3.76s 82: learn: 0.6989219 total: 339ms remaining: 3.75s 83: learn: 0.6946642 total: 342ms remaining: 3.73s 84: learn: 0.6902238 total: 345ms remaining: 3.72s 85: learn: 0.6850230 total: 348ms remaining: 3.7s 86: learn: 0.6809441 total: 352ms remaining: 3.69s 87: learn: 0.6778489 total: 355ms remaining: 3.67s 88: learn: 0.6740331 total: 358ms remaining: 3.66s 89: learn: 0.6704494 total: 361ms remaining: 3.65s 90: learn: 0.6661259 total: 364ms remaining: 3.63s 91: learn: 0.6626042 total: 367ms remaining: 3.62s 92: learn: 0.6590292 total: 370ms remaining: 3.61s 93: learn: 0.6554413 total: 373ms remaining: 3.6s 94: learn: 0.6526417 total: 376ms remaining: 3.59s 95: learn: 0.6490439 total: 379ms remaining: 3.57s 96: learn: 0.6467224 total: 383ms remaining: 3.56s 97: learn: 0.6432002 total: 386ms remaining: 3.55s 98: learn: 0.6412776 total: 389ms remaining: 3.54s 99: learn: 0.6389192 total: 393ms remaining: 3.53s 100: learn: 0.6359101 total: 396ms remaining: 3.52s 101: learn: 0.6322537 total: 399ms remaining: 3.52s 102: learn: 0.6295722 total: 403ms remaining: 3.51s 103: learn: 0.6266020 total: 407ms remaining: 3.5s 104: learn: 0.6242294 total: 410ms remaining: 3.49s 105: learn: 0.6217499 total: 413ms remaining: 3.48s 106: learn: 0.6204236 total: 417ms remaining: 3.48s 107: learn: 0.6179910 total: 421ms remaining: 3.47s 108: learn: 0.6154061 total: 424ms remaining: 3.46s 109: learn: 0.6137596 total: 427ms remaining: 3.45s 110: learn: 0.6117283 total: 430ms remaining: 3.44s 111: learn: 0.6098140 total: 433ms remaining: 3.44s 112: learn: 0.6075496 total: 437ms remaining: 3.43s 113: learn: 0.6058904 total: 440ms remaining: 3.42s 114: learn: 0.6038122 total: 443ms remaining: 3.41s 115: learn: 0.6021583 total: 446ms remaining: 3.4s 116: learn: 0.6007633 total: 449ms remaining: 3.39s 117: learn: 0.5989211 total: 453ms remaining: 3.38s 118: learn: 0.5972269 total: 456ms remaining: 3.37s 119: learn: 0.5953529 total: 459ms remaining: 3.37s 120: learn: 0.5937351 total: 462ms remaining: 3.36s 121: learn: 0.5917424 total: 465ms remaining: 3.35s 122: learn: 0.5902046 total: 468ms remaining: 3.34s 123: learn: 0.5883267 total: 472ms remaining: 3.33s 124: learn: 0.5870216 total: 475ms remaining: 3.33s 125: learn: 0.5853003 total: 478ms remaining: 3.32s 126: learn: 0.5843247 total: 481ms remaining: 3.31s 127: learn: 0.5823154 total: 485ms remaining: 3.3s 128: learn: 0.5809541 total: 488ms remaining: 3.29s 129: learn: 0.5789569 total: 491ms remaining: 3.29s 130: learn: 0.5773752 total: 494ms remaining: 3.28s 131: learn: 0.5752670 total: 498ms remaining: 3.27s 132: learn: 0.5737710 total: 501ms remaining: 3.26s 133: learn: 0.5722145 total: 504ms remaining: 3.26s 134: learn: 0.5706925 total: 507ms remaining: 3.25s 135: learn: 0.5696125 total: 510ms remaining: 3.24s 136: learn: 0.5684057 total: 513ms remaining: 3.23s 137: learn: 0.5668800 total: 517ms remaining: 3.23s 138: learn: 0.5652156 total: 521ms remaining: 3.23s 139: learn: 0.5641754 total: 525ms remaining: 3.22s 140: learn: 0.5627745 total: 528ms remaining: 3.21s 141: learn: 0.5621379 total: 531ms remaining: 3.21s 142: learn: 0.5609717 total: 534ms remaining: 3.2s 143: learn: 0.5595847 total: 538ms remaining: 3.2s 144: learn: 0.5582880 total: 541ms remaining: 3.19s 145: learn: 0.5573794 total: 544ms remaining: 3.18s 146: learn: 0.5561539 total: 547ms remaining: 3.17s 147: learn: 0.5548667 total: 551ms remaining: 3.17s 148: learn: 0.5533491 total: 554ms remaining: 3.16s 149: learn: 0.5519277 total: 557ms remaining: 3.15s 150: learn: 0.5510423 total: 560ms remaining: 3.15s 151: learn: 0.5500217 total: 563ms remaining: 3.14s 152: learn: 0.5485391 total: 567ms remaining: 3.14s 153: learn: 0.5472895 total: 570ms remaining: 3.13s 154: learn: 0.5464558 total: 573ms remaining: 3.12s 155: learn: 0.5456874 total: 576ms remaining: 3.12s 156: learn: 0.5443351 total: 579ms remaining: 3.11s 157: learn: 0.5431722 total: 582ms remaining: 3.1s 158: learn: 0.5420827 total: 586ms remaining: 3.1s 159: learn: 0.5404464 total: 589ms remaining: 3.09s 160: learn: 0.5394812 total: 593ms remaining: 3.09s 161: learn: 0.5383878 total: 596ms remaining: 3.08s 162: learn: 0.5371991 total: 600ms remaining: 3.08s 163: learn: 0.5360893 total: 604ms remaining: 3.08s 164: learn: 0.5348795 total: 607ms remaining: 3.07s 165: learn: 0.5339208 total: 610ms remaining: 3.07s 166: learn: 0.5325131 total: 613ms remaining: 3.06s 167: learn: 0.5314276 total: 617ms remaining: 3.06s 168: learn: 0.5303847 total: 621ms remaining: 3.05s 169: learn: 0.5294339 total: 625ms remaining: 3.05s 170: learn: 0.5284374 total: 629ms remaining: 3.05s 171: learn: 0.5275295 total: 632ms remaining: 3.04s 172: learn: 0.5267194 total: 636ms remaining: 3.04s 173: learn: 0.5256977 total: 639ms remaining: 3.03s 174: learn: 0.5240502 total: 642ms remaining: 3.03s 175: learn: 0.5231121 total: 645ms remaining: 3.02s 176: learn: 0.5219285 total: 649ms remaining: 3.02s 177: learn: 0.5212026 total: 653ms remaining: 3.01s 178: learn: 0.5201152 total: 656ms remaining: 3.01s 179: learn: 0.5191207 total: 659ms remaining: 3s 180: learn: 0.5184563 total: 663ms remaining: 3s 181: learn: 0.5175407 total: 667ms remaining: 3s 182: learn: 0.5164491 total: 670ms remaining: 2.99s 183: learn: 0.5152700 total: 673ms remaining: 2.98s 184: learn: 0.5144159 total: 676ms remaining: 2.98s 185: learn: 0.5133076 total: 680ms remaining: 2.98s 186: learn: 0.5120716 total: 683ms remaining: 2.97s 187: learn: 0.5112054 total: 687ms remaining: 2.97s 188: learn: 0.5100507 total: 690ms remaining: 2.96s 189: learn: 0.5090646 total: 693ms remaining: 2.95s 190: learn: 0.5079116 total: 697ms remaining: 2.95s 191: learn: 0.5068761 total: 700ms remaining: 2.95s 192: learn: 0.5059156 total: 703ms remaining: 2.94s 193: learn: 0.5046199 total: 707ms remaining: 2.94s 194: learn: 0.5036700 total: 711ms remaining: 2.93s 195: learn: 0.5030163 total: 714ms remaining: 2.93s 196: learn: 0.5021730 total: 717ms remaining: 2.92s 197: learn: 0.5013725 total: 720ms remaining: 2.92s 198: learn: 0.5003361 total: 723ms remaining: 2.91s 199: learn: 0.4992527 total: 726ms remaining: 2.9s 200: learn: 0.4982242 total: 730ms remaining: 2.9s 201: learn: 0.4969857 total: 733ms remaining: 2.9s 202: learn: 0.4964471 total: 736ms remaining: 2.89s 203: learn: 0.4955578 total: 740ms remaining: 2.88s 204: learn: 0.4948073 total: 743ms remaining: 2.88s 205: learn: 0.4938657 total: 746ms remaining: 2.88s 206: learn: 0.4927562 total: 750ms remaining: 2.87s 207: learn: 0.4922795 total: 753ms remaining: 2.87s 208: learn: 0.4912653 total: 756ms remaining: 2.86s 209: learn: 0.4901704 total: 759ms remaining: 2.85s 210: learn: 0.4890394 total: 763ms remaining: 2.85s 211: learn: 0.4880374 total: 766ms remaining: 2.85s 212: learn: 0.4869604 total: 769ms remaining: 2.84s 213: learn: 0.4857559 total: 773ms remaining: 2.84s 214: learn: 0.4849108 total: 776ms remaining: 2.83s 215: learn: 0.4840655 total: 780ms remaining: 2.83s 216: learn: 0.4832522 total: 783ms remaining: 2.83s 217: learn: 0.4823870 total: 786ms remaining: 2.82s 218: learn: 0.4811295 total: 789ms remaining: 2.81s 219: learn: 0.4800552 total: 793ms remaining: 2.81s 220: learn: 0.4795163 total: 796ms remaining: 2.81s 221: learn: 0.4786248 total: 800ms remaining: 2.8s 222: learn: 0.4778513 total: 804ms remaining: 2.8s 223: learn: 0.4771565 total: 807ms remaining: 2.8s 224: learn: 0.4763662 total: 811ms remaining: 2.79s 225: learn: 0.4752713 total: 815ms remaining: 2.79s 226: learn: 0.4744328 total: 818ms remaining: 2.78s 227: learn: 0.4736467 total: 821ms remaining: 2.78s 228: learn: 0.4726439 total: 825ms remaining: 2.78s 229: learn: 0.4720170 total: 828ms remaining: 2.77s 230: learn: 0.4709224 total: 832ms remaining: 2.77s 231: learn: 0.4703208 total: 835ms remaining: 2.76s 232: learn: 0.4694894 total: 838ms remaining: 2.76s 233: learn: 0.4687498 total: 841ms remaining: 2.75s 234: learn: 0.4679239 total: 845ms remaining: 2.75s 235: learn: 0.4670138 total: 848ms remaining: 2.75s 236: learn: 0.4663092 total: 851ms remaining: 2.74s 237: learn: 0.4655198 total: 855ms remaining: 2.74s 238: learn: 0.4644130 total: 858ms remaining: 2.73s 239: learn: 0.4633542 total: 862ms remaining: 2.73s 240: learn: 0.4624652 total: 865ms remaining: 2.73s 241: learn: 0.4612790 total: 869ms remaining: 2.72s 242: learn: 0.4603322 total: 873ms remaining: 2.72s 243: learn: 0.4595621 total: 876ms remaining: 2.71s 244: learn: 0.4590279 total: 879ms remaining: 2.71s 245: learn: 0.4581471 total: 882ms remaining: 2.7s 246: learn: 0.4575228 total: 886ms remaining: 2.7s 247: learn: 0.4573403 total: 889ms remaining: 2.69s 248: learn: 0.4564604 total: 892ms remaining: 2.69s 249: learn: 0.4556477 total: 896ms remaining: 2.69s 250: learn: 0.4546236 total: 899ms remaining: 2.68s 251: learn: 0.4533804 total: 902ms remaining: 2.68s 252: learn: 0.4528083 total: 906ms remaining: 2.67s 253: learn: 0.4522885 total: 909ms remaining: 2.67s 254: learn: 0.4516671 total: 912ms remaining: 2.67s 255: learn: 0.4509067 total: 916ms remaining: 2.66s 256: learn: 0.4502604 total: 919ms remaining: 2.65s 257: learn: 0.4496600 total: 922ms remaining: 2.65s 258: learn: 0.4490141 total: 925ms remaining: 2.65s 259: learn: 0.4483730 total: 928ms remaining: 2.64s 260: learn: 0.4476013 total: 931ms remaining: 2.64s 261: learn: 0.4464556 total: 934ms remaining: 2.63s 262: learn: 0.4460153 total: 938ms remaining: 2.63s 263: learn: 0.4452759 total: 941ms remaining: 2.62s 264: learn: 0.4448251 total: 945ms remaining: 2.62s 265: learn: 0.4439872 total: 948ms remaining: 2.62s 266: learn: 0.4432581 total: 951ms remaining: 2.61s 267: learn: 0.4425783 total: 955ms remaining: 2.61s 268: learn: 0.4420917 total: 958ms remaining: 2.6s 269: learn: 0.4414548 total: 961ms remaining: 2.6s 270: learn: 0.4406199 total: 965ms remaining: 2.59s 271: learn: 0.4401848 total: 968ms remaining: 2.59s 272: learn: 0.4396981 total: 971ms remaining: 2.59s 273: learn: 0.4391266 total: 974ms remaining: 2.58s 274: learn: 0.4389288 total: 978ms remaining: 2.58s 275: learn: 0.4383482 total: 981ms remaining: 2.57s 276: learn: 0.4374541 total: 984ms remaining: 2.57s 277: learn: 0.4366817 total: 988ms remaining: 2.56s 278: learn: 0.4361637 total: 991ms remaining: 2.56s 279: learn: 0.4355985 total: 995ms remaining: 2.56s 280: learn: 0.4349846 total: 998ms remaining: 2.55s 281: learn: 0.4343480 total: 1s remaining: 2.55s 282: learn: 0.4340070 total: 1s remaining: 2.54s 283: learn: 0.4330370 total: 1.01s remaining: 2.54s 284: learn: 0.4323418 total: 1.01s remaining: 2.54s 285: learn: 0.4320533 total: 1.01s remaining: 2.53s 286: learn: 0.4312878 total: 1.02s remaining: 2.53s 287: learn: 0.4302917 total: 1.02s remaining: 2.52s 288: learn: 0.4296243 total: 1.02s remaining: 2.52s 289: learn: 0.4289696 total: 1.03s remaining: 2.51s 290: learn: 0.4280839 total: 1.03s remaining: 2.51s 291: learn: 0.4276341 total: 1.03s remaining: 2.51s 292: learn: 0.4273110 total: 1.04s remaining: 2.5s 293: learn: 0.4264972 total: 1.04s remaining: 2.5s 294: learn: 0.4260751 total: 1.04s remaining: 2.49s 295: learn: 0.4250758 total: 1.05s remaining: 2.49s 296: learn: 0.4242247 total: 1.05s remaining: 2.48s 297: learn: 0.4237303 total: 1.05s remaining: 2.48s 298: learn: 0.4229408 total: 1.06s remaining: 2.48s 299: learn: 0.4221990 total: 1.06s remaining: 2.47s 300: learn: 0.4216592 total: 1.06s remaining: 2.47s 301: learn: 0.4215269 total: 1.06s remaining: 2.46s 302: learn: 0.4209301 total: 1.07s remaining: 2.46s 303: learn: 0.4204768 total: 1.07s remaining: 2.45s 304: learn: 0.4199938 total: 1.07s remaining: 2.45s 305: learn: 0.4190819 total: 1.08s remaining: 2.45s 306: learn: 0.4186059 total: 1.08s remaining: 2.44s 307: learn: 0.4179422 total: 1.08s remaining: 2.44s 308: learn: 0.4173496 total: 1.09s remaining: 2.43s 309: learn: 0.4165331 total: 1.09s remaining: 2.43s 310: learn: 0.4156881 total: 1.09s remaining: 2.42s 311: learn: 0.4150547 total: 1.1s remaining: 2.42s 312: learn: 0.4144039 total: 1.1s remaining: 2.42s 313: learn: 0.4137554 total: 1.1s remaining: 2.41s 314: learn: 0.4128423 total: 1.11s remaining: 2.41s 315: learn: 0.4121851 total: 1.11s remaining: 2.4s 316: learn: 0.4120652 total: 1.11s remaining: 2.4s 317: learn: 0.4119489 total: 1.12s remaining: 2.4s 318: learn: 0.4113800 total: 1.12s remaining: 2.39s 319: learn: 0.4110944 total: 1.12s remaining: 2.39s 320: learn: 0.4103885 total: 1.13s remaining: 2.38s 321: learn: 0.4098944 total: 1.13s remaining: 2.38s 322: learn: 0.4096331 total: 1.13s remaining: 2.38s 323: learn: 0.4092385 total: 1.14s remaining: 2.37s 324: learn: 0.4088671 total: 1.14s remaining: 2.37s 325: learn: 0.4085054 total: 1.14s remaining: 2.36s 326: learn: 0.4079698 total: 1.15s remaining: 2.36s 327: learn: 0.4075026 total: 1.15s remaining: 2.35s 328: learn: 0.4065781 total: 1.15s remaining: 2.35s 329: learn: 0.4061529 total: 1.16s remaining: 2.35s 330: learn: 0.4057269 total: 1.16s remaining: 2.34s 331: learn: 0.4050642 total: 1.16s remaining: 2.34s 332: learn: 0.4047541 total: 1.17s remaining: 2.33s 333: learn: 0.4044138 total: 1.17s remaining: 2.34s 334: learn: 0.4040232 total: 1.17s remaining: 2.33s 335: learn: 0.4031800 total: 1.18s remaining: 2.33s 336: learn: 0.4026754 total: 1.18s remaining: 2.32s 337: learn: 0.4022685 total: 1.19s remaining: 2.32s 338: learn: 0.4017881 total: 1.19s remaining: 2.32s 339: learn: 0.4015254 total: 1.19s remaining: 2.31s 340: learn: 0.4009658 total: 1.2s remaining: 2.31s 341: learn: 0.4008354 total: 1.2s remaining: 2.31s 342: learn: 0.4002425 total: 1.2s remaining: 2.3s 343: learn: 0.3997084 total: 1.21s remaining: 2.3s 344: learn: 0.3992679 total: 1.21s remaining: 2.29s 345: learn: 0.3989327 total: 1.21s remaining: 2.29s 346: learn: 0.3983560 total: 1.22s remaining: 2.29s 347: learn: 0.3978115 total: 1.22s remaining: 2.28s 348: learn: 0.3972325 total: 1.22s remaining: 2.28s 349: learn: 0.3970591 total: 1.23s remaining: 2.27s 350: learn: 0.3967434 total: 1.23s remaining: 2.27s 351: learn: 0.3964092 total: 1.23s remaining: 2.27s 352: learn: 0.3959126 total: 1.24s remaining: 2.26s 353: learn: 0.3956668 total: 1.24s remaining: 2.26s 354: learn: 0.3952955 total: 1.24s remaining: 2.25s 355: learn: 0.3948577 total: 1.25s remaining: 2.25s 356: learn: 0.3942103 total: 1.25s remaining: 2.25s 357: learn: 0.3934125 total: 1.25s remaining: 2.24s 358: learn: 0.3929393 total: 1.25s remaining: 2.24s 359: learn: 0.3923515 total: 1.26s remaining: 2.24s 360: learn: 0.3918921 total: 1.26s remaining: 2.23s 361: learn: 0.3913288 total: 1.26s remaining: 2.23s 362: learn: 0.3909709 total: 1.27s remaining: 2.22s 363: learn: 0.3907805 total: 1.27s remaining: 2.22s 364: learn: 0.3902974 total: 1.27s remaining: 2.22s 365: learn: 0.3897694 total: 1.28s remaining: 2.21s 366: learn: 0.3895400 total: 1.28s remaining: 2.21s 367: learn: 0.3887777 total: 1.28s remaining: 2.2s 368: learn: 0.3879556 total: 1.29s remaining: 2.2s 369: learn: 0.3872718 total: 1.29s remaining: 2.2s 370: learn: 0.3866534 total: 1.29s remaining: 2.19s 371: learn: 0.3859462 total: 1.3s remaining: 2.19s 372: learn: 0.3853502 total: 1.3s remaining: 2.19s 373: learn: 0.3851891 total: 1.3s remaining: 2.18s 374: learn: 0.3846603 total: 1.31s remaining: 2.18s 375: learn: 0.3844259 total: 1.31s remaining: 2.17s 376: learn: 0.3838207 total: 1.31s remaining: 2.17s 377: learn: 0.3833191 total: 1.32s remaining: 2.17s 378: learn: 0.3826555 total: 1.32s remaining: 2.16s 379: learn: 0.3825492 total: 1.32s remaining: 2.16s 380: learn: 0.3819978 total: 1.33s remaining: 2.15s 381: learn: 0.3819036 total: 1.33s remaining: 2.15s 382: learn: 0.3814676 total: 1.33s remaining: 2.15s 383: learn: 0.3806646 total: 1.34s remaining: 2.14s 384: learn: 0.3801625 total: 1.34s remaining: 2.14s 385: learn: 0.3795941 total: 1.34s remaining: 2.13s 386: learn: 0.3792967 total: 1.34s remaining: 2.13s 387: learn: 0.3786377 total: 1.35s remaining: 2.13s 388: learn: 0.3781177 total: 1.35s remaining: 2.12s 389: learn: 0.3776432 total: 1.35s remaining: 2.12s 390: learn: 0.3771742 total: 1.36s remaining: 2.12s 391: learn: 0.3767596 total: 1.36s remaining: 2.11s 392: learn: 0.3762086 total: 1.36s remaining: 2.11s 393: learn: 0.3758127 total: 1.37s remaining: 2.1s 394: learn: 0.3752773 total: 1.37s remaining: 2.1s 395: learn: 0.3748929 total: 1.37s remaining: 2.1s 396: learn: 0.3744049 total: 1.38s remaining: 2.09s 397: learn: 0.3742496 total: 1.38s remaining: 2.09s 398: learn: 0.3737786 total: 1.39s remaining: 2.09s 399: learn: 0.3734626 total: 1.39s remaining: 2.08s 400: learn: 0.3730718 total: 1.39s remaining: 2.08s 401: learn: 0.3724141 total: 1.39s remaining: 2.07s 402: learn: 0.3719930 total: 1.4s remaining: 2.07s 403: learn: 0.3716086 total: 1.4s remaining: 2.07s 404: learn: 0.3709842 total: 1.4s remaining: 2.06s 405: learn: 0.3704663 total: 1.41s remaining: 2.06s 406: learn: 0.3699775 total: 1.41s remaining: 2.06s 407: learn: 0.3698619 total: 1.41s remaining: 2.05s 408: learn: 0.3694625 total: 1.42s remaining: 2.05s 409: learn: 0.3688800 total: 1.42s remaining: 2.04s 410: learn: 0.3687014 total: 1.42s remaining: 2.04s 411: learn: 0.3682739 total: 1.43s remaining: 2.04s 412: learn: 0.3678543 total: 1.43s remaining: 2.03s 413: learn: 0.3672088 total: 1.43s remaining: 2.03s 414: learn: 0.3667914 total: 1.44s remaining: 2.02s 415: learn: 0.3666853 total: 1.44s remaining: 2.02s 416: learn: 0.3664491 total: 1.44s remaining: 2.02s 417: learn: 0.3658085 total: 1.45s remaining: 2.01s 418: learn: 0.3655759 total: 1.45s remaining: 2.01s 419: learn: 0.3650955 total: 1.45s remaining: 2.01s 420: learn: 0.3646143 total: 1.46s remaining: 2s 421: learn: 0.3643238 total: 1.46s remaining: 2s 422: learn: 0.3640482 total: 1.46s remaining: 2s 423: learn: 0.3638433 total: 1.47s remaining: 1.99s 424: learn: 0.3634363 total: 1.47s remaining: 1.99s 425: learn: 0.3627563 total: 1.47s remaining: 1.98s 426: learn: 0.3620870 total: 1.48s remaining: 1.98s 427: learn: 0.3615678 total: 1.48s remaining: 1.98s 428: learn: 0.3608775 total: 1.48s remaining: 1.97s 429: learn: 0.3605767 total: 1.48s remaining: 1.97s 430: learn: 0.3600947 total: 1.49s remaining: 1.96s 431: learn: 0.3599022 total: 1.49s remaining: 1.96s 432: learn: 0.3592169 total: 1.49s remaining: 1.96s 433: learn: 0.3586974 total: 1.5s remaining: 1.95s 434: learn: 0.3583809 total: 1.5s remaining: 1.95s 435: learn: 0.3580570 total: 1.5s remaining: 1.95s 436: learn: 0.3579281 total: 1.51s remaining: 1.94s 437: learn: 0.3572908 total: 1.51s remaining: 1.94s 438: learn: 0.3567620 total: 1.51s remaining: 1.93s 439: learn: 0.3563369 total: 1.52s remaining: 1.93s 440: learn: 0.3559366 total: 1.52s remaining: 1.93s 441: learn: 0.3555465 total: 1.52s remaining: 1.92s 442: learn: 0.3551928 total: 1.53s remaining: 1.92s 443: learn: 0.3546176 total: 1.53s remaining: 1.92s 444: learn: 0.3542245 total: 1.53s remaining: 1.91s 445: learn: 0.3537539 total: 1.54s remaining: 1.91s 446: learn: 0.3533939 total: 1.54s remaining: 1.91s 447: learn: 0.3530292 total: 1.54s remaining: 1.9s 448: learn: 0.3527625 total: 1.55s remaining: 1.9s 449: learn: 0.3523223 total: 1.55s remaining: 1.89s 450: learn: 0.3522093 total: 1.55s remaining: 1.89s 451: learn: 0.3515991 total: 1.56s remaining: 1.89s 452: learn: 0.3513537 total: 1.56s remaining: 1.88s 453: learn: 0.3508370 total: 1.56s remaining: 1.88s 454: learn: 0.3505712 total: 1.57s remaining: 1.88s 455: learn: 0.3500935 total: 1.57s remaining: 1.87s 456: learn: 0.3497203 total: 1.57s remaining: 1.87s 457: learn: 0.3493516 total: 1.58s remaining: 1.87s 458: learn: 0.3493006 total: 1.58s remaining: 1.86s 459: learn: 0.3492219 total: 1.58s remaining: 1.86s 460: learn: 0.3486440 total: 1.59s remaining: 1.86s 461: learn: 0.3481056 total: 1.59s remaining: 1.85s 462: learn: 0.3476578 total: 1.59s remaining: 1.85s 463: learn: 0.3472720 total: 1.6s remaining: 1.85s 464: learn: 0.3469809 total: 1.6s remaining: 1.84s 465: learn: 0.3464662 total: 1.6s remaining: 1.84s 466: learn: 0.3461573 total: 1.61s remaining: 1.83s 467: learn: 0.3455262 total: 1.61s remaining: 1.83s 468: learn: 0.3450315 total: 1.61s remaining: 1.83s 469: learn: 0.3447621 total: 1.62s remaining: 1.82s 470: learn: 0.3443707 total: 1.62s remaining: 1.82s 471: learn: 0.3440078 total: 1.62s remaining: 1.82s 472: learn: 0.3436588 total: 1.63s remaining: 1.81s 473: learn: 0.3434317 total: 1.63s remaining: 1.81s 474: learn: 0.3429587 total: 1.63s remaining: 1.81s 475: learn: 0.3427129 total: 1.64s remaining: 1.8s 476: learn: 0.3426614 total: 1.64s remaining: 1.8s 477: learn: 0.3424276 total: 1.65s remaining: 1.8s 478: learn: 0.3419850 total: 1.65s remaining: 1.79s 479: learn: 0.3414828 total: 1.65s remaining: 1.79s 480: learn: 0.3414148 total: 1.65s remaining: 1.78s 481: learn: 0.3411039 total: 1.66s remaining: 1.78s 482: learn: 0.3404094 total: 1.66s remaining: 1.78s 483: learn: 0.3400870 total: 1.66s remaining: 1.77s 484: learn: 0.3397870 total: 1.67s remaining: 1.77s 485: learn: 0.3395923 total: 1.67s remaining: 1.77s 486: learn: 0.3393283 total: 1.68s remaining: 1.76s 487: learn: 0.3390542 total: 1.68s remaining: 1.76s 488: learn: 0.3387735 total: 1.68s remaining: 1.76s 489: learn: 0.3384992 total: 1.69s remaining: 1.75s 490: learn: 0.3382679 total: 1.69s remaining: 1.75s 491: learn: 0.3379591 total: 1.69s remaining: 1.75s 492: learn: 0.3375240 total: 1.7s remaining: 1.74s 493: learn: 0.3371663 total: 1.7s remaining: 1.74s 494: learn: 0.3368073 total: 1.7s remaining: 1.74s 495: learn: 0.3364258 total: 1.71s remaining: 1.73s 496: learn: 0.3357624 total: 1.71s remaining: 1.73s 497: learn: 0.3353391 total: 1.71s remaining: 1.73s 498: learn: 0.3351359 total: 1.72s remaining: 1.72s 499: learn: 0.3347721 total: 1.72s remaining: 1.72s 500: learn: 0.3344689 total: 1.72s remaining: 1.72s 501: learn: 0.3338885 total: 1.73s remaining: 1.71s 502: learn: 0.3335304 total: 1.73s remaining: 1.71s 503: learn: 0.3333156 total: 1.73s remaining: 1.7s 504: learn: 0.3326858 total: 1.74s remaining: 1.7s 505: learn: 0.3322794 total: 1.74s remaining: 1.7s 506: learn: 0.3317957 total: 1.74s remaining: 1.69s 507: learn: 0.3315374 total: 1.75s remaining: 1.69s 508: learn: 0.3310346 total: 1.75s remaining: 1.69s 509: learn: 0.3308218 total: 1.75s remaining: 1.68s 510: learn: 0.3305515 total: 1.76s remaining: 1.68s 511: learn: 0.3303324 total: 1.76s remaining: 1.68s 512: learn: 0.3298027 total: 1.76s remaining: 1.67s 513: learn: 0.3295265 total: 1.77s remaining: 1.67s 514: learn: 0.3292161 total: 1.77s remaining: 1.67s 515: learn: 0.3289082 total: 1.77s remaining: 1.67s 516: learn: 0.3284967 total: 1.78s remaining: 1.66s 517: learn: 0.3280931 total: 1.78s remaining: 1.66s 518: learn: 0.3277610 total: 1.79s remaining: 1.66s 519: learn: 0.3273487 total: 1.79s remaining: 1.65s 520: learn: 0.3270085 total: 1.79s remaining: 1.65s 521: learn: 0.3267775 total: 1.8s remaining: 1.65s 522: learn: 0.3265932 total: 1.8s remaining: 1.64s 523: learn: 0.3262778 total: 1.8s remaining: 1.64s 524: learn: 0.3256224 total: 1.81s remaining: 1.64s 525: learn: 0.3253104 total: 1.81s remaining: 1.63s 526: learn: 0.3252620 total: 1.81s remaining: 1.63s 527: learn: 0.3248235 total: 1.82s remaining: 1.63s 528: learn: 0.3246449 total: 1.82s remaining: 1.62s 529: learn: 0.3241613 total: 1.82s remaining: 1.62s 530: learn: 0.3239438 total: 1.83s remaining: 1.61s 531: learn: 0.3235460 total: 1.83s remaining: 1.61s 532: learn: 0.3228695 total: 1.84s remaining: 1.61s 533: learn: 0.3226564 total: 1.84s remaining: 1.6s 534: learn: 0.3223583 total: 1.84s remaining: 1.6s 535: learn: 0.3221698 total: 1.85s remaining: 1.6s 536: learn: 0.3216386 total: 1.85s remaining: 1.59s 537: learn: 0.3208786 total: 1.85s remaining: 1.59s 538: learn: 0.3206405 total: 1.86s remaining: 1.59s 539: learn: 0.3200470 total: 1.86s remaining: 1.58s 540: learn: 0.3194599 total: 1.86s remaining: 1.58s 541: learn: 0.3191917 total: 1.87s remaining: 1.58s 542: learn: 0.3187413 total: 1.87s remaining: 1.57s 543: learn: 0.3183845 total: 1.87s remaining: 1.57s 544: learn: 0.3181347 total: 1.88s remaining: 1.57s 545: learn: 0.3179402 total: 1.88s remaining: 1.56s 546: learn: 0.3176041 total: 1.88s remaining: 1.56s 547: learn: 0.3171493 total: 1.89s remaining: 1.56s 548: learn: 0.3167678 total: 1.89s remaining: 1.55s 549: learn: 0.3161866 total: 1.89s remaining: 1.55s 550: learn: 0.3158943 total: 1.9s remaining: 1.54s 551: learn: 0.3155500 total: 1.9s remaining: 1.54s 552: learn: 0.3151446 total: 1.9s remaining: 1.54s 553: learn: 0.3150676 total: 1.91s remaining: 1.53s 554: learn: 0.3148019 total: 1.91s remaining: 1.53s 555: learn: 0.3146087 total: 1.91s remaining: 1.53s 556: learn: 0.3145459 total: 1.92s remaining: 1.52s 557: learn: 0.3144546 total: 1.92s remaining: 1.52s 558: learn: 0.3140608 total: 1.92s remaining: 1.52s 559: learn: 0.3137361 total: 1.93s remaining: 1.51s 560: learn: 0.3134164 total: 1.93s remaining: 1.51s 561: learn: 0.3133328 total: 1.93s remaining: 1.51s 562: learn: 0.3129987 total: 1.94s remaining: 1.5s 563: learn: 0.3127488 total: 1.94s remaining: 1.5s 564: learn: 0.3124070 total: 1.94s remaining: 1.5s 565: learn: 0.3120479 total: 1.95s remaining: 1.49s 566: learn: 0.3118329 total: 1.95s remaining: 1.49s 567: learn: 0.3115109 total: 1.95s remaining: 1.49s 568: learn: 0.3112201 total: 1.96s remaining: 1.48s 569: learn: 0.3108064 total: 1.96s remaining: 1.48s 570: learn: 0.3105657 total: 1.96s remaining: 1.47s 571: learn: 0.3102145 total: 1.97s remaining: 1.47s 572: learn: 0.3097385 total: 1.97s remaining: 1.47s 573: learn: 0.3093835 total: 1.97s remaining: 1.47s 574: learn: 0.3090351 total: 1.98s remaining: 1.46s 575: learn: 0.3087266 total: 1.98s remaining: 1.46s 576: learn: 0.3085897 total: 1.98s remaining: 1.45s 577: learn: 0.3083184 total: 1.99s remaining: 1.45s 578: learn: 0.3082340 total: 1.99s remaining: 1.45s 579: learn: 0.3076160 total: 1.99s remaining: 1.44s 580: learn: 0.3073862 total: 2s remaining: 1.44s 581: learn: 0.3070794 total: 2s remaining: 1.44s 582: learn: 0.3069234 total: 2s remaining: 1.43s 583: learn: 0.3068346 total: 2.01s remaining: 1.43s 584: learn: 0.3065929 total: 2.01s remaining: 1.43s 585: learn: 0.3063195 total: 2.01s remaining: 1.42s 586: learn: 0.3059346 total: 2.02s remaining: 1.42s 587: learn: 0.3056757 total: 2.02s remaining: 1.42s 588: learn: 0.3053577 total: 2.02s remaining: 1.41s 589: learn: 0.3049893 total: 2.03s remaining: 1.41s 590: learn: 0.3047812 total: 2.03s remaining: 1.4s 591: learn: 0.3046023 total: 2.03s remaining: 1.4s 592: learn: 0.3042608 total: 2.04s remaining: 1.4s 593: learn: 0.3039681 total: 2.04s remaining: 1.39s 594: learn: 0.3035661 total: 2.04s remaining: 1.39s 595: learn: 0.3033736 total: 2.05s remaining: 1.39s 596: learn: 0.3029874 total: 2.05s remaining: 1.38s 597: learn: 0.3023723 total: 2.05s remaining: 1.38s 598: learn: 0.3020138 total: 2.06s remaining: 1.38s 599: learn: 0.3017719 total: 2.06s remaining: 1.37s 600: learn: 0.3015049 total: 2.06s remaining: 1.37s 601: learn: 0.3011821 total: 2.06s remaining: 1.36s 602: learn: 0.3009602 total: 2.07s remaining: 1.36s 603: learn: 0.3005602 total: 2.07s remaining: 1.36s 604: learn: 0.3004219 total: 2.08s remaining: 1.35s 605: learn: 0.3002263 total: 2.08s remaining: 1.35s 606: learn: 0.2999867 total: 2.08s remaining: 1.35s 607: learn: 0.2994174 total: 2.08s remaining: 1.34s 608: learn: 0.2991720 total: 2.09s remaining: 1.34s 609: learn: 0.2987068 total: 2.09s remaining: 1.34s 610: learn: 0.2985418 total: 2.09s remaining: 1.33s 611: learn: 0.2981738 total: 2.1s remaining: 1.33s 612: learn: 0.2979976 total: 2.1s remaining: 1.33s 613: learn: 0.2976747 total: 2.1s remaining: 1.32s 614: learn: 0.2973307 total: 2.11s remaining: 1.32s 615: learn: 0.2970624 total: 2.11s remaining: 1.31s 616: learn: 0.2967306 total: 2.11s remaining: 1.31s 617: learn: 0.2964076 total: 2.12s remaining: 1.31s 618: learn: 0.2962058 total: 2.12s remaining: 1.3s 619: learn: 0.2957920 total: 2.12s remaining: 1.3s 620: learn: 0.2954920 total: 2.13s remaining: 1.3s 621: learn: 0.2950938 total: 2.13s remaining: 1.29s 622: learn: 0.2948042 total: 2.13s remaining: 1.29s 623: learn: 0.2943325 total: 2.13s remaining: 1.29s 624: learn: 0.2940090 total: 2.14s remaining: 1.28s 625: learn: 0.2934846 total: 2.14s remaining: 1.28s 626: learn: 0.2930588 total: 2.15s remaining: 1.28s 627: learn: 0.2928589 total: 2.15s remaining: 1.27s 628: learn: 0.2924964 total: 2.15s remaining: 1.27s 629: learn: 0.2924495 total: 2.15s remaining: 1.26s 630: learn: 0.2920443 total: 2.16s remaining: 1.26s 631: learn: 0.2916802 total: 2.16s remaining: 1.26s 632: learn: 0.2914135 total: 2.17s remaining: 1.25s 633: learn: 0.2912321 total: 2.17s remaining: 1.25s 634: learn: 0.2909848 total: 2.17s remaining: 1.25s 635: learn: 0.2904671 total: 2.17s remaining: 1.25s 636: learn: 0.2902028 total: 2.18s remaining: 1.24s 637: learn: 0.2898836 total: 2.18s remaining: 1.24s 638: learn: 0.2895101 total: 2.19s remaining: 1.23s 639: learn: 0.2892208 total: 2.19s remaining: 1.23s 640: learn: 0.2891015 total: 2.19s remaining: 1.23s 641: learn: 0.2888759 total: 2.19s remaining: 1.22s 642: learn: 0.2887283 total: 2.2s remaining: 1.22s 643: learn: 0.2885609 total: 2.2s remaining: 1.22s 644: learn: 0.2881727 total: 2.2s remaining: 1.21s 645: learn: 0.2877596 total: 2.21s remaining: 1.21s 646: learn: 0.2874737 total: 2.21s remaining: 1.21s 647: learn: 0.2870647 total: 2.21s remaining: 1.2s 648: learn: 0.2865679 total: 2.22s remaining: 1.2s 649: learn: 0.2863326 total: 2.22s remaining: 1.2s 650: learn: 0.2861018 total: 2.22s remaining: 1.19s 651: learn: 0.2857576 total: 2.23s remaining: 1.19s 652: learn: 0.2853869 total: 2.23s remaining: 1.18s 653: learn: 0.2851109 total: 2.23s remaining: 1.18s 654: learn: 0.2848755 total: 2.23s remaining: 1.18s 655: learn: 0.2847261 total: 2.24s remaining: 1.17s 656: learn: 0.2844077 total: 2.24s remaining: 1.17s 657: learn: 0.2840529 total: 2.25s remaining: 1.17s 658: learn: 0.2838391 total: 2.25s remaining: 1.16s 659: learn: 0.2833714 total: 2.25s remaining: 1.16s 660: learn: 0.2831854 total: 2.25s remaining: 1.16s 661: learn: 0.2831066 total: 2.26s remaining: 1.15s 662: learn: 0.2827877 total: 2.26s remaining: 1.15s 663: learn: 0.2825169 total: 2.26s remaining: 1.15s 664: learn: 0.2819040 total: 2.27s remaining: 1.14s 665: learn: 0.2815437 total: 2.27s remaining: 1.14s 666: learn: 0.2812914 total: 2.27s remaining: 1.14s 667: learn: 0.2809317 total: 2.28s remaining: 1.13s 668: learn: 0.2805022 total: 2.28s remaining: 1.13s 669: learn: 0.2802108 total: 2.28s remaining: 1.12s 670: learn: 0.2800260 total: 2.29s remaining: 1.12s 671: learn: 0.2796513 total: 2.29s remaining: 1.12s 672: learn: 0.2793144 total: 2.29s remaining: 1.11s 673: learn: 0.2787577 total: 2.3s remaining: 1.11s 674: learn: 0.2786898 total: 2.3s remaining: 1.11s 675: learn: 0.2783373 total: 2.3s remaining: 1.1s 676: learn: 0.2780719 total: 2.31s remaining: 1.1s 677: learn: 0.2778878 total: 2.31s remaining: 1.1s 678: learn: 0.2775800 total: 2.31s remaining: 1.09s 679: learn: 0.2774185 total: 2.31s remaining: 1.09s 680: learn: 0.2773129 total: 2.32s remaining: 1.08s 681: learn: 0.2770925 total: 2.32s remaining: 1.08s 682: learn: 0.2768828 total: 2.32s remaining: 1.08s 683: learn: 0.2768476 total: 2.33s remaining: 1.07s 684: learn: 0.2767953 total: 2.33s remaining: 1.07s 685: learn: 0.2764390 total: 2.33s remaining: 1.07s 686: learn: 0.2762438 total: 2.34s remaining: 1.06s 687: learn: 0.2758780 total: 2.34s remaining: 1.06s 688: learn: 0.2755027 total: 2.34s remaining: 1.06s 689: learn: 0.2751770 total: 2.35s remaining: 1.05s 690: learn: 0.2747903 total: 2.35s remaining: 1.05s 691: learn: 0.2744635 total: 2.35s remaining: 1.05s 692: learn: 0.2740074 total: 2.36s remaining: 1.04s 693: learn: 0.2739810 total: 2.36s remaining: 1.04s 694: learn: 0.2736186 total: 2.36s remaining: 1.04s 695: learn: 0.2734820 total: 2.37s remaining: 1.03s 696: learn: 0.2732142 total: 2.37s remaining: 1.03s 697: learn: 0.2729981 total: 2.37s remaining: 1.03s 698: learn: 0.2728261 total: 2.38s remaining: 1.02s 699: learn: 0.2725844 total: 2.38s remaining: 1.02s 700: learn: 0.2721828 total: 2.38s remaining: 1.01s 701: learn: 0.2719829 total: 2.38s remaining: 1.01s 702: learn: 0.2719228 total: 2.39s remaining: 1.01s 703: learn: 0.2716038 total: 2.39s remaining: 1s 704: learn: 0.2714065 total: 2.39s remaining: 1s 705: learn: 0.2710144 total: 2.4s remaining: 998ms 706: learn: 0.2709227 total: 2.4s remaining: 995ms 707: learn: 0.2707329 total: 2.4s remaining: 991ms 708: learn: 0.2704630 total: 2.41s remaining: 988ms 709: learn: 0.2702448 total: 2.41s remaining: 984ms 710: learn: 0.2700055 total: 2.41s remaining: 981ms 711: learn: 0.2694917 total: 2.42s remaining: 977ms 712: learn: 0.2693000 total: 2.42s remaining: 974ms 713: learn: 0.2690283 total: 2.42s remaining: 970ms 714: learn: 0.2687780 total: 2.42s remaining: 967ms 715: learn: 0.2687384 total: 2.43s remaining: 963ms 716: learn: 0.2685078 total: 2.43s remaining: 960ms 717: learn: 0.2681714 total: 2.43s remaining: 956ms 718: learn: 0.2679557 total: 2.44s remaining: 953ms 719: learn: 0.2678159 total: 2.44s remaining: 949ms 720: learn: 0.2676483 total: 2.44s remaining: 946ms 721: learn: 0.2674793 total: 2.45s remaining: 942ms 722: learn: 0.2669124 total: 2.45s remaining: 939ms 723: learn: 0.2666120 total: 2.45s remaining: 935ms 724: learn: 0.2665815 total: 2.46s remaining: 932ms 725: learn: 0.2662691 total: 2.46s remaining: 928ms 726: learn: 0.2661253 total: 2.46s remaining: 925ms 727: learn: 0.2659665 total: 2.47s remaining: 922ms 728: learn: 0.2656264 total: 2.47s remaining: 918ms 729: learn: 0.2654130 total: 2.47s remaining: 915ms 730: learn: 0.2652073 total: 2.48s remaining: 911ms 731: learn: 0.2649612 total: 2.48s remaining: 908ms 732: learn: 0.2648599 total: 2.48s remaining: 905ms 733: learn: 0.2646590 total: 2.49s remaining: 901ms 734: learn: 0.2644626 total: 2.49s remaining: 898ms 735: learn: 0.2641833 total: 2.49s remaining: 894ms 736: learn: 0.2639876 total: 2.5s remaining: 891ms 737: learn: 0.2637405 total: 2.5s remaining: 887ms 738: learn: 0.2636841 total: 2.5s remaining: 884ms 739: learn: 0.2632669 total: 2.51s remaining: 881ms 740: learn: 0.2629340 total: 2.51s remaining: 877ms 741: learn: 0.2624565 total: 2.51s remaining: 874ms 742: learn: 0.2621763 total: 2.52s remaining: 871ms 743: learn: 0.2620465 total: 2.52s remaining: 867ms 744: learn: 0.2617521 total: 2.52s remaining: 864ms 745: learn: 0.2616214 total: 2.53s remaining: 860ms 746: learn: 0.2613156 total: 2.53s remaining: 857ms 747: learn: 0.2610295 total: 2.53s remaining: 854ms 748: learn: 0.2605249 total: 2.54s remaining: 850ms 749: learn: 0.2602300 total: 2.54s remaining: 847ms 750: learn: 0.2598565 total: 2.54s remaining: 843ms 751: learn: 0.2595145 total: 2.55s remaining: 840ms 752: learn: 0.2590396 total: 2.55s remaining: 837ms 753: learn: 0.2586583 total: 2.55s remaining: 833ms 754: learn: 0.2583753 total: 2.56s remaining: 830ms 755: learn: 0.2581020 total: 2.56s remaining: 827ms 756: learn: 0.2576964 total: 2.56s remaining: 823ms 757: learn: 0.2576649 total: 2.57s remaining: 820ms 758: learn: 0.2572911 total: 2.57s remaining: 816ms 759: learn: 0.2572575 total: 2.57s remaining: 813ms 760: learn: 0.2569378 total: 2.58s remaining: 810ms 761: learn: 0.2567645 total: 2.58s remaining: 806ms 762: learn: 0.2565097 total: 2.58s remaining: 803ms 763: learn: 0.2560526 total: 2.59s remaining: 799ms 764: learn: 0.2560135 total: 2.59s remaining: 796ms 765: learn: 0.2555596 total: 2.59s remaining: 792ms 766: learn: 0.2553998 total: 2.6s remaining: 789ms 767: learn: 0.2551371 total: 2.6s remaining: 786ms 768: learn: 0.2550625 total: 2.6s remaining: 782ms 769: learn: 0.2547166 total: 2.61s remaining: 779ms 770: learn: 0.2545018 total: 2.61s remaining: 775ms 771: learn: 0.2541630 total: 2.61s remaining: 772ms 772: learn: 0.2537533 total: 2.62s remaining: 768ms 773: learn: 0.2536258 total: 2.62s remaining: 765ms 774: learn: 0.2536134 total: 2.62s remaining: 762ms 775: learn: 0.2535560 total: 2.63s remaining: 758ms 776: learn: 0.2533754 total: 2.63s remaining: 755ms 777: learn: 0.2529591 total: 2.63s remaining: 751ms 778: learn: 0.2527568 total: 2.64s remaining: 748ms 779: learn: 0.2523681 total: 2.64s remaining: 744ms 780: learn: 0.2519061 total: 2.64s remaining: 741ms 781: learn: 0.2516554 total: 2.65s remaining: 738ms 782: learn: 0.2516155 total: 2.65s remaining: 734ms 783: learn: 0.2514085 total: 2.65s remaining: 731ms 784: learn: 0.2512819 total: 2.65s remaining: 727ms 785: learn: 0.2511030 total: 2.66s remaining: 724ms 786: learn: 0.2508719 total: 2.66s remaining: 721ms 787: learn: 0.2507493 total: 2.67s remaining: 717ms 788: learn: 0.2506815 total: 2.67s remaining: 714ms 789: learn: 0.2506345 total: 2.67s remaining: 710ms 790: learn: 0.2503126 total: 2.67s remaining: 707ms 791: learn: 0.2500426 total: 2.68s remaining: 703ms 792: learn: 0.2498370 total: 2.68s remaining: 700ms 793: learn: 0.2496517 total: 2.68s remaining: 696ms 794: learn: 0.2492671 total: 2.69s remaining: 693ms 795: learn: 0.2490305 total: 2.69s remaining: 690ms 796: learn: 0.2487405 total: 2.69s remaining: 686ms 797: learn: 0.2485630 total: 2.7s remaining: 683ms 798: learn: 0.2485411 total: 2.7s remaining: 679ms 799: learn: 0.2482743 total: 2.7s remaining: 676ms 800: learn: 0.2479895 total: 2.71s remaining: 672ms 801: learn: 0.2476468 total: 2.71s remaining: 669ms 802: learn: 0.2476272 total: 2.71s remaining: 666ms 803: learn: 0.2474233 total: 2.72s remaining: 662ms 804: learn: 0.2471691 total: 2.72s remaining: 659ms 805: learn: 0.2470093 total: 2.72s remaining: 656ms 806: learn: 0.2466640 total: 2.73s remaining: 653ms 807: learn: 0.2464114 total: 2.74s remaining: 650ms 808: learn: 0.2462497 total: 2.74s remaining: 648ms 809: learn: 0.2458715 total: 2.75s remaining: 644ms 810: learn: 0.2455936 total: 2.75s remaining: 641ms 811: learn: 0.2454435 total: 2.76s remaining: 638ms 812: learn: 0.2452169 total: 2.76s remaining: 635ms 813: learn: 0.2451734 total: 2.76s remaining: 631ms 814: learn: 0.2448680 total: 2.77s remaining: 628ms 815: learn: 0.2448261 total: 2.77s remaining: 625ms 816: learn: 0.2443975 total: 2.77s remaining: 621ms 817: learn: 0.2443746 total: 2.78s remaining: 618ms 818: learn: 0.2440732 total: 2.78s remaining: 614ms 819: learn: 0.2439608 total: 2.78s remaining: 611ms 820: learn: 0.2438886 total: 2.79s remaining: 607ms 821: learn: 0.2437090 total: 2.79s remaining: 604ms 822: learn: 0.2436873 total: 2.79s remaining: 601ms 823: learn: 0.2434946 total: 2.8s remaining: 597ms 824: learn: 0.2431038 total: 2.8s remaining: 594ms 825: learn: 0.2428742 total: 2.8s remaining: 590ms 826: learn: 0.2425255 total: 2.81s remaining: 587ms 827: learn: 0.2423590 total: 2.81s remaining: 584ms 828: learn: 0.2418667 total: 2.81s remaining: 580ms 829: learn: 0.2415327 total: 2.81s remaining: 577ms 830: learn: 0.2412411 total: 2.82s remaining: 573ms 831: learn: 0.2408410 total: 2.82s remaining: 570ms 832: learn: 0.2407998 total: 2.83s remaining: 566ms 833: learn: 0.2406402 total: 2.83s remaining: 563ms 834: learn: 0.2406208 total: 2.83s remaining: 560ms 835: learn: 0.2405666 total: 2.83s remaining: 556ms 836: learn: 0.2403029 total: 2.84s remaining: 553ms 837: learn: 0.2401250 total: 2.84s remaining: 549ms 838: learn: 0.2401027 total: 2.84s remaining: 546ms 839: learn: 0.2398424 total: 2.85s remaining: 542ms 840: learn: 0.2398274 total: 2.85s remaining: 539ms 841: learn: 0.2395215 total: 2.85s remaining: 536ms 842: learn: 0.2392416 total: 2.86s remaining: 532ms 843: learn: 0.2390250 total: 2.86s remaining: 529ms 844: learn: 0.2388586 total: 2.86s remaining: 525ms 845: learn: 0.2386279 total: 2.87s remaining: 522ms 846: learn: 0.2384384 total: 2.87s remaining: 518ms 847: learn: 0.2383070 total: 2.87s remaining: 515ms 848: learn: 0.2381507 total: 2.88s remaining: 511ms 849: learn: 0.2379354 total: 2.88s remaining: 508ms 850: learn: 0.2376864 total: 2.88s remaining: 505ms 851: learn: 0.2373329 total: 2.88s remaining: 501ms 852: learn: 0.2372335 total: 2.89s remaining: 498ms 853: learn: 0.2367554 total: 2.89s remaining: 494ms 854: learn: 0.2365606 total: 2.89s remaining: 491ms 855: learn: 0.2365435 total: 2.9s remaining: 488ms 856: learn: 0.2362393 total: 2.9s remaining: 484ms 857: learn: 0.2361009 total: 2.9s remaining: 481ms 858: learn: 0.2360136 total: 2.91s remaining: 477ms 859: learn: 0.2357448 total: 2.91s remaining: 474ms 860: learn: 0.2354335 total: 2.91s remaining: 470ms 861: learn: 0.2353438 total: 2.92s remaining: 467ms 862: learn: 0.2353260 total: 2.92s remaining: 463ms 863: learn: 0.2348839 total: 2.92s remaining: 460ms 864: learn: 0.2347374 total: 2.93s remaining: 457ms 865: learn: 0.2345540 total: 2.93s remaining: 453ms 866: learn: 0.2343686 total: 2.93s remaining: 450ms 867: learn: 0.2341852 total: 2.94s remaining: 446ms 868: learn: 0.2339969 total: 2.94s remaining: 443ms 869: learn: 0.2339190 total: 2.94s remaining: 440ms 870: learn: 0.2337132 total: 2.94s remaining: 436ms 871: learn: 0.2333663 total: 2.95s remaining: 433ms 872: learn: 0.2331696 total: 2.95s remaining: 429ms 873: learn: 0.2329565 total: 2.96s remaining: 426ms 874: learn: 0.2326612 total: 2.96s remaining: 423ms 875: learn: 0.2323790 total: 2.96s remaining: 419ms 876: learn: 0.2322076 total: 2.96s remaining: 416ms 877: learn: 0.2319440 total: 2.97s remaining: 413ms 878: learn: 0.2317660 total: 2.97s remaining: 409ms 879: learn: 0.2317280 total: 2.98s remaining: 406ms 880: learn: 0.2317000 total: 2.98s remaining: 402ms 881: learn: 0.2315801 total: 2.98s remaining: 399ms 882: learn: 0.2315676 total: 2.98s remaining: 395ms 883: learn: 0.2314147 total: 2.99s remaining: 392ms 884: learn: 0.2312086 total: 2.99s remaining: 389ms 885: learn: 0.2308071 total: 2.99s remaining: 385ms 886: learn: 0.2304809 total: 3s remaining: 382ms 887: learn: 0.2302192 total: 3s remaining: 378ms 888: learn: 0.2299117 total: 3s remaining: 375ms 889: learn: 0.2297213 total: 3.01s remaining: 372ms 890: learn: 0.2296087 total: 3.01s remaining: 368ms 891: learn: 0.2294252 total: 3.01s remaining: 365ms 892: learn: 0.2292066 total: 3.02s remaining: 361ms 893: learn: 0.2288881 total: 3.02s remaining: 358ms 894: learn: 0.2286894 total: 3.02s remaining: 355ms 895: learn: 0.2284812 total: 3.03s remaining: 351ms 896: learn: 0.2281809 total: 3.03s remaining: 348ms 897: learn: 0.2279510 total: 3.03s remaining: 345ms 898: learn: 0.2279389 total: 3.04s remaining: 341ms 899: learn: 0.2277300 total: 3.04s remaining: 338ms 900: learn: 0.2274465 total: 3.04s remaining: 334ms 901: learn: 0.2274025 total: 3.04s remaining: 331ms 902: learn: 0.2272341 total: 3.05s remaining: 327ms 903: learn: 0.2272163 total: 3.05s remaining: 324ms 904: learn: 0.2270345 total: 3.06s remaining: 321ms 905: learn: 0.2270112 total: 3.06s remaining: 317ms 906: learn: 0.2267261 total: 3.06s remaining: 314ms 907: learn: 0.2265868 total: 3.06s remaining: 310ms 908: learn: 0.2262635 total: 3.07s remaining: 307ms 909: learn: 0.2261474 total: 3.07s remaining: 304ms 910: learn: 0.2261271 total: 3.07s remaining: 300ms 911: learn: 0.2259786 total: 3.08s remaining: 297ms 912: learn: 0.2257185 total: 3.08s remaining: 293ms 913: learn: 0.2255092 total: 3.08s remaining: 290ms 914: learn: 0.2253473 total: 3.08s remaining: 287ms 915: learn: 0.2252805 total: 3.09s remaining: 283ms 916: learn: 0.2252673 total: 3.09s remaining: 280ms 917: learn: 0.2250309 total: 3.09s remaining: 276ms 918: learn: 0.2250015 total: 3.1s remaining: 273ms 919: learn: 0.2249589 total: 3.1s remaining: 270ms 920: learn: 0.2248454 total: 3.1s remaining: 266ms 921: learn: 0.2246542 total: 3.11s remaining: 263ms 922: learn: 0.2243981 total: 3.11s remaining: 259ms 923: learn: 0.2242925 total: 3.11s remaining: 256ms 924: learn: 0.2240367 total: 3.12s remaining: 253ms 925: learn: 0.2236834 total: 3.12s remaining: 249ms 926: learn: 0.2234257 total: 3.12s remaining: 246ms 927: learn: 0.2230992 total: 3.13s remaining: 243ms 928: learn: 0.2230899 total: 3.13s remaining: 239ms 929: learn: 0.2228504 total: 3.13s remaining: 236ms 930: learn: 0.2227112 total: 3.14s remaining: 232ms 931: learn: 0.2224408 total: 3.14s remaining: 229ms 932: learn: 0.2222795 total: 3.14s remaining: 226ms 933: learn: 0.2220837 total: 3.15s remaining: 222ms 934: learn: 0.2218455 total: 3.15s remaining: 219ms 935: learn: 0.2216053 total: 3.15s remaining: 216ms 936: learn: 0.2212786 total: 3.16s remaining: 212ms 937: learn: 0.2210942 total: 3.16s remaining: 209ms 938: learn: 0.2207330 total: 3.16s remaining: 205ms 939: learn: 0.2204996 total: 3.17s remaining: 202ms 940: learn: 0.2201419 total: 3.17s remaining: 199ms 941: learn: 0.2197450 total: 3.17s remaining: 195ms 942: learn: 0.2195649 total: 3.17s remaining: 192ms 943: learn: 0.2194141 total: 3.18s remaining: 189ms 944: learn: 0.2192079 total: 3.18s remaining: 185ms 945: learn: 0.2189989 total: 3.19s remaining: 182ms 946: learn: 0.2187927 total: 3.19s remaining: 178ms 947: learn: 0.2186379 total: 3.19s remaining: 175ms 948: learn: 0.2184722 total: 3.19s remaining: 172ms 949: learn: 0.2183393 total: 3.2s remaining: 168ms 950: learn: 0.2181005 total: 3.2s remaining: 165ms 951: learn: 0.2179581 total: 3.2s remaining: 162ms 952: learn: 0.2177392 total: 3.21s remaining: 158ms 953: learn: 0.2174948 total: 3.21s remaining: 155ms 954: learn: 0.2173059 total: 3.21s remaining: 151ms 955: learn: 0.2171342 total: 3.22s remaining: 148ms 956: learn: 0.2169922 total: 3.22s remaining: 145ms 957: learn: 0.2167591 total: 3.22s remaining: 141ms 958: learn: 0.2164620 total: 3.23s remaining: 138ms 959: learn: 0.2161386 total: 3.23s remaining: 135ms 960: learn: 0.2161191 total: 3.23s remaining: 131ms 961: learn: 0.2161055 total: 3.24s remaining: 128ms 962: learn: 0.2158105 total: 3.24s remaining: 125ms 963: learn: 0.2154934 total: 3.24s remaining: 121ms 964: learn: 0.2153511 total: 3.25s remaining: 118ms 965: learn: 0.2150633 total: 3.25s remaining: 114ms 966: learn: 0.2147549 total: 3.25s remaining: 111ms 967: learn: 0.2144673 total: 3.26s remaining: 108ms 968: learn: 0.2141893 total: 3.26s remaining: 104ms 969: learn: 0.2140516 total: 3.26s remaining: 101ms 970: learn: 0.2140342 total: 3.27s remaining: 97.6ms 971: learn: 0.2139076 total: 3.27s remaining: 94.2ms 972: learn: 0.2135383 total: 3.27s remaining: 90.8ms 973: learn: 0.2132737 total: 3.28s remaining: 87.5ms 974: learn: 0.2130593 total: 3.28s remaining: 84.1ms 975: learn: 0.2127680 total: 3.28s remaining: 80.7ms 976: learn: 0.2125970 total: 3.29s remaining: 77.4ms 977: learn: 0.2123460 total: 3.29s remaining: 74ms 978: learn: 0.2121052 total: 3.29s remaining: 70.6ms 979: learn: 0.2118347 total: 3.29s remaining: 67.3ms 980: learn: 0.2114822 total: 3.3s remaining: 63.9ms 981: learn: 0.2112691 total: 3.3s remaining: 60.5ms 982: learn: 0.2110438 total: 3.31s remaining: 57.2ms 983: learn: 0.2108138 total: 3.31s remaining: 53.8ms 984: learn: 0.2106651 total: 3.31s remaining: 50.4ms 985: learn: 0.2104341 total: 3.31s remaining: 47.1ms 986: learn: 0.2104232 total: 3.32s remaining: 43.7ms 987: learn: 0.2103155 total: 3.32s remaining: 40.3ms 988: learn: 0.2100220 total: 3.33s remaining: 37ms 989: learn: 0.2098433 total: 3.33s remaining: 33.6ms 990: learn: 0.2094910 total: 3.33s remaining: 30.3ms 991: learn: 0.2094807 total: 3.33s remaining: 26.9ms 992: learn: 0.2093171 total: 3.34s remaining: 23.5ms 993: learn: 0.2090154 total: 3.35s remaining: 20.2ms 994: learn: 0.2088564 total: 3.35s remaining: 16.9ms 995: learn: 0.2085709 total: 3.36s remaining: 13.5ms 996: learn: 0.2083527 total: 3.36s remaining: 10.1ms 997: learn: 0.2080872 total: 3.37s remaining: 6.75ms 998: learn: 0.2080622 total: 3.37s remaining: 3.37ms 999: learn: 0.2077081 total: 3.37s remaining: 0us
df_compare
| Model | Mean Squared Error | Mean Absolute Error | R2 Score | Training Time (s) | |
|---|---|---|---|---|---|
| 0 | Ridge Regression | 0.749019 | 0.601173 | 0.981691 | 0.004274 |
| 1 | Linear Regression | 0.767788 | 0.612101 | 0.981232 | 0.015218 |
| 2 | Lasso Regression | 1.101427 | 0.785124 | 0.973077 | 0.091215 |
| 3 | Elastic Net | 1.12101 | 0.749384 | 0.972598 | 0.099243 |
| 4 | AdaBoost | 88.4183 | 7.570231 | -1.161293 | 0.679208 |
| 5 | Gradient Boosting | 88.682287 | 7.57811 | -1.167746 | 2.148484 |
| 6 | Decision Tree | 91.197554 | 7.800773 | -1.229229 | 0.069291 |
| 7 | Random Forest | 92.656816 | 7.815599 | -1.264899 | 3.783108 |
| 11 | CatBoost | 94.177261 | 7.912422 | -1.302065 | 3.518485 |
| 8 | XGBoost | 103.064918 | 8.379896 | -1.519314 | 0.455947 |
| 9 | K-Neighbors Regressor | 330.959365 | 16.750109 | -7.089956 | 0.000565 |
| 10 | SVR | 349.436131 | 17.668843 | -7.541601 | 0.134831 |
# Train the linear regression model
lr_model_base = LinearRegression()
lr_model_base.fit(X_train_scaled, y_train)
# Make predictions on the scaled test set
lr_pred_base = lr_model_base.predict(X_test_scaled)
prediction_df = pd.DataFrame()
prediction_df['date'] = df[df.date.dt.year>=2020]['date']
prediction_df['y_test'] = y_test
prediction_df['lr_pred_base'] = lr_pred_base
prediction_df.head()
| date | y_test | lr_pred_base | |
|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.234807 |
| 1730 | 2020-01-03 | 54.150002 | 54.462575 |
| 1731 | 2020-01-06 | 53.919998 | 54.392052 |
| 1732 | 2020-01-07 | 54.049999 | 53.866825 |
| 1733 | 2020-01-08 | 54.189999 | 54.141524 |
lr_score_base = evaluate_regression_model(y_test, lr_pred_base)
Mean Squared Error (MSE): 0.768 Root Mean Squared Error (RMSE): 0.876 Mean Absolute Error (MAE): 0.612 R-squared (R2): 0.981
lr_score_base
{'MSE': 0.7677881743434356,
'RMSE': 0.8762352277462003,
'MAE': 0.612101458244357,
'R2': 0.9812322205481261}
plot_regression_accuracy(y_test, lr_pred_base)
plot_predictions(df,lr_pred_base)
lr_base_feature_importance = plot_feature_importance(lr_model_base,X_train,20)
lr_base_feature_importance[:15]
| Feature | Importance | |
|---|---|---|
| 0 | adj_close_5d_avg | 34.506178 |
| 1 | sma_5 | 25.998560 |
| 2 | ema_9 | 21.885541 |
| 3 | adj_close_1d_ago | 9.666451 |
| 4 | adj_close_15d_avg | 8.627124 |
| 5 | close_5d_ago | 8.502948 |
| 6 | close_1d_ago | 7.337071 |
| 7 | adj_close_3d_avg | 7.122290 |
| 8 | low_5d_avg | 5.716135 |
| 9 | low_10d_avg | 5.542512 |
| 10 | open_5d_avg | 5.011481 |
| 11 | adj_close_5d_ago | 4.463160 |
| 12 | adj_close_7d_avg | 4.352698 |
| 13 | adj_close_1w_ago | 4.039827 |
| 14 | adj_close_3d_ago | 3.904271 |
keep_cols20 = lr_base_feature_importance[:20]['Feature'].tolist()
X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]
scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)
# Train the linear regression model
lr_model20 = LinearRegression()
lr_model20.fit(X_train_scaled20, y_train)
# Make predictions on the scaled test set
lr_pred20 = lr_model20.predict(X_test_scaled20)
lr_score20 = evaluate_regression_model(y_test, lr_pred20)
Mean Squared Error (MSE): 0.777 Root Mean Squared Error (RMSE): 0.882 Mean Absolute Error (MAE): 0.611 R-squared (R2): 0.981
prediction_df['lr_pred20'] = lr_pred20
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | |
|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.234807 | 54.271747 |
| 1730 | 2020-01-03 | 54.150002 | 54.462575 | 54.607093 |
| 1731 | 2020-01-06 | 53.919998 | 54.392052 | 54.587532 |
| 1732 | 2020-01-07 | 54.049999 | 53.866825 | 53.925943 |
| 1733 | 2020-01-08 | 54.189999 | 54.141524 | 53.925500 |
lr_score20
{'MSE': 0.7771456769467653,
'RMSE': 0.8815586633609617,
'MAE': 0.6109586051263984,
'R2': 0.9810034861771783}
plot_feature_importance(lr_model20,X_train20,20)
| Feature | Importance | |
|---|---|---|
| 0 | adj_close_5d_avg | 38.854308 |
| 1 | sma_5 | 27.484310 |
| 2 | adj_close_1d_ago | 23.775534 |
| 3 | close_1d_ago | 19.321375 |
| 4 | close_5d_ago | 15.098392 |
| 5 | adj_close_5d_ago | 11.704156 |
| 6 | adj_close_15d_avg | 4.158538 |
| 7 | adj_close_1w_ago | 3.070985 |
| 8 | sma_15 | 2.762572 |
| 9 | close_1w_ago | 2.757137 |
| 10 | low_5d_avg | 2.447157 |
| 11 | open_5d_avg | 1.692342 |
| 12 | low_10d_avg | 1.600124 |
| 13 | high_5d_avg | 1.304435 |
| 14 | open_10d_avg | 1.301215 |
| 15 | adj_close_3d_ago | 1.169397 |
| 16 | ema_9 | 0.882495 |
| 17 | adj_close_30d_avg | 0.311786 |
| 18 | adj_close_7d_avg | 0.285091 |
| 19 | adj_close_3d_avg | 0.276478 |
keep_cols15 = lr_base_feature_importance[:15]['Feature'].tolist()
X_train15 = X_train[keep_cols15]
X_test15 = X_test[keep_cols15]
scaler = StandardScaler()
X_train_scaled15 = scaler.fit_transform(X_train15)
X_test_scaled15 = scaler.transform(X_test15)
# Train the linear regression model
lr_model15 = LinearRegression()
lr_model15.fit(X_train_scaled15, y_train)
# Make predictions on the scaled test set
lr_pred15 = lr_model15.predict(X_test_scaled15)
lr_score15 = evaluate_regression_model(y_test, lr_pred15)
Mean Squared Error (MSE): 0.764 Root Mean Squared Error (RMSE): 0.874 Mean Absolute Error (MAE): 0.608 R-squared (R2): 0.981
prediction_df['lr_pred15'] = lr_pred15
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | |
|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.234807 | 54.271747 | 54.203303 |
| 1730 | 2020-01-03 | 54.150002 | 54.462575 | 54.607093 | 54.564688 |
| 1731 | 2020-01-06 | 53.919998 | 54.392052 | 54.587532 | 54.570543 |
| 1732 | 2020-01-07 | 54.049999 | 53.866825 | 53.925943 | 53.969683 |
| 1733 | 2020-01-08 | 54.189999 | 54.141524 | 53.925500 | 53.970832 |
lr_score15
{'MSE': 0.76431207310404,
'RMSE': 0.874249434145679,
'MAE': 0.6082563126517426,
'R2': 0.9813171902098028}
plot_feature_importance(lr_model15,X_train15,15)
| Feature | Importance | |
|---|---|---|
| 0 | adj_close_5d_avg | 36.266693 |
| 1 | sma_5 | 26.803574 |
| 2 | adj_close_1d_ago | 22.806743 |
| 3 | close_1d_ago | 18.534424 |
| 4 | close_5d_ago | 14.111041 |
| 5 | adj_close_5d_ago | 10.688623 |
| 6 | adj_close_3d_ago | 1.161695 |
| 7 | low_5d_avg | 0.833075 |
| 8 | ema_9 | 0.494789 |
| 9 | adj_close_15d_avg | 0.421462 |
| 10 | adj_close_7d_avg | 0.379717 |
| 11 | low_10d_avg | 0.299484 |
| 12 | adj_close_1w_ago | 0.296130 |
| 13 | adj_close_3d_avg | 0.259609 |
| 14 | open_5d_avg | 0.252947 |
keep_cols10 = lr_base_feature_importance[:10]['Feature'].tolist()
X_train10 = X_train[keep_cols10]
X_test10 = X_test[keep_cols10]
scaler = StandardScaler()
X_train_scaled10 = scaler.fit_transform(X_train10)
X_test_scaled10 = scaler.transform(X_test10)
# Train the linear regression model
lr_model10 = LinearRegression()
lr_model10.fit(X_train_scaled10, y_train)
# Make predictions on the scaled test set
lr_pred10 = lr_model10.predict(X_test_scaled10)
lr_score10 = evaluate_regression_model(y_test, lr_pred10)
Mean Squared Error (MSE): 0.843 Root Mean Squared Error (RMSE): 0.918 Mean Absolute Error (MAE): 0.634 R-squared (R2): 0.979
prediction_df['lr_pred10'] = lr_pred10
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | |
|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.234807 | 54.271747 | 54.203303 | 53.928125 |
| 1730 | 2020-01-03 | 54.150002 | 54.462575 | 54.607093 | 54.564688 | 54.251539 |
| 1731 | 2020-01-06 | 53.919998 | 54.392052 | 54.587532 | 54.570543 | 54.299935 |
| 1732 | 2020-01-07 | 54.049999 | 53.866825 | 53.925943 | 53.969683 | 54.039427 |
| 1733 | 2020-01-08 | 54.189999 | 54.141524 | 53.925500 | 53.970832 | 54.149424 |
lr_score10
{'MSE': 0.8431161224362277,
'RMSE': 0.91821354947323,
'MAE': 0.6337464901517136,
'R2': 0.9793909075875864}
plot_feature_importance(lr_model10,X_train10,10)
| Feature | Importance | |
|---|---|---|
| 0 | adj_close_1d_ago | 31.773423 |
| 1 | adj_close_5d_avg | 30.972946 |
| 2 | close_1d_ago | 25.880350 |
| 3 | sma_5 | 25.688939 |
| 4 | close_5d_ago | 5.203279 |
| 5 | adj_close_3d_avg | 2.188661 |
| 6 | adj_close_15d_avg | 1.072305 |
| 7 | ema_9 | 0.829016 |
| 8 | low_5d_avg | 0.412671 |
| 9 | low_10d_avg | 0.181438 |
ridge_model = Ridge()
# Define the hyperparameter grid to search
param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}
# Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=ridge_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train_scaled, y_train)
# Get the best model
best_ridge_model = grid_search.best_estimator_
# Make predictions on the test set
ridge_pred_base = best_ridge_model.predict(X_test_scaled)
# Evaluate the best model
mse = mean_squared_error(y_test, ridge_pred_base)
rmse = mean_squared_error(y_test, ridge_pred_base, squared=False)
mae = mean_absolute_error(y_test, ridge_pred_base)
r2 = r2_score(y_test, ridge_pred_base)
print("Best Ridge Regression Model:")
print(f"Best alpha: {best_ridge_model.alpha}")
print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
print(f"Mean Squared Error: {np.round(mse,3)}")
print(f"Mean Absolute Error: {np.round(mae,3)}")
print(f"R2 Score: {np.round(r2,3)}")
ridge_score = {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'R2': r2
}
Best Ridge Regression Model: Best alpha: 0.001 Root Mean Squared Error (RMSE): 0.872 Mean Squared Error: 0.76 Mean Absolute Error: 0.609 R2 Score: 0.981
ridge_base_feature_importance = plot_feature_importance(best_ridge_model,X_train,20)
ridge_base_feature_importance[:20]
| Feature | Importance | |
|---|---|---|
| 0 | adj_close_5d_avg | 29.209670 |
| 1 | sma_5 | 23.327076 |
| 2 | ema_9 | 20.052079 |
| 3 | adj_close_1d_ago | 8.608975 |
| 4 | close_5d_ago | 7.815929 |
| 5 | adj_close_15d_avg | 6.758087 |
| 6 | close_1d_ago | 6.383786 |
| 7 | adj_close_3d_avg | 6.095190 |
| 8 | low_10d_avg | 5.185958 |
| 9 | high_5d_avg | 4.577397 |
| 10 | low_5d_avg | 4.530334 |
| 11 | adj_close_5d_ago | 4.444782 |
| 12 | open_5d_avg | 4.252683 |
| 13 | sma_10 | 4.082776 |
| 14 | adj_close_30d_avg | 3.729390 |
| 15 | macd | 3.581098 |
| 16 | sma_15 | 3.507948 |
| 17 | high_7d_avg | 3.355806 |
| 18 | open_10d_avg | 3.126070 |
| 19 | high_15d_avg | 3.066558 |
prediction_df['ridge_pred_base'] = ridge_pred_base
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | |
|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.234807 | 54.271747 | 54.203303 | 53.928125 | 54.233949 |
| 1730 | 2020-01-03 | 54.150002 | 54.462575 | 54.607093 | 54.564688 | 54.251539 | 54.459871 |
| 1731 | 2020-01-06 | 53.919998 | 54.392052 | 54.587532 | 54.570543 | 54.299935 | 54.363214 |
| 1732 | 2020-01-07 | 54.049999 | 53.866825 | 53.925943 | 53.969683 | 54.039427 | 53.879662 |
| 1733 | 2020-01-08 | 54.189999 | 54.141524 | 53.925500 | 53.970832 | 54.149424 | 54.173588 |
keep_cols20 = ridge_base_feature_importance[:20]['Feature'].tolist()
X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]
scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)
# Train model
ridge_model20 = Ridge(alpha=0.001)
ridge_model20.fit(X_train_scaled20, y_train)
# Make predictions on the scaled test set
ridge_pred20 = ridge_model20.predict(X_test_scaled20)
ridge_score20 = evaluate_regression_model(y_test, ridge_pred20)
Mean Squared Error (MSE): 0.771 Root Mean Squared Error (RMSE): 0.878 Mean Absolute Error (MAE): 0.609 R-squared (R2): 0.981
plot_feature_importance(ridge_model20,X_train20,20)
| Feature | Importance | |
|---|---|---|
| 0 | adj_close_5d_avg | 34.865155 |
| 1 | sma_5 | 26.561371 |
| 2 | adj_close_1d_ago | 23.306249 |
| 3 | close_1d_ago | 19.054534 |
| 4 | close_5d_ago | 13.719602 |
| 5 | adj_close_5d_ago | 9.989549 |
| 6 | adj_close_15d_avg | 2.405561 |
| 7 | low_5d_avg | 2.326657 |
| 8 | sma_15 | 1.820856 |
| 9 | low_10d_avg | 1.635199 |
| 10 | open_5d_avg | 1.424137 |
| 11 | adj_close_3d_avg | 1.268768 |
| 12 | sma_10 | 1.218959 |
| 13 | high_5d_avg | 1.016692 |
| 14 | open_10d_avg | 0.924125 |
| 15 | ema_9 | 0.564495 |
| 16 | high_7d_avg | 0.189963 |
| 17 | adj_close_30d_avg | 0.186579 |
| 18 | high_15d_avg | 0.086638 |
| 19 | macd | 0.004499 |
prediction_df['ridge_pred20'] = ridge_pred20
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | |
|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.234807 | 54.271747 | 54.203303 | 53.928125 | 54.233949 | 54.297031 |
| 1730 | 2020-01-03 | 54.150002 | 54.462575 | 54.607093 | 54.564688 | 54.251539 | 54.459871 | 54.574923 |
| 1731 | 2020-01-06 | 53.919998 | 54.392052 | 54.587532 | 54.570543 | 54.299935 | 54.363214 | 54.606500 |
| 1732 | 2020-01-07 | 54.049999 | 53.866825 | 53.925943 | 53.969683 | 54.039427 | 53.879662 | 53.954122 |
| 1733 | 2020-01-08 | 54.189999 | 54.141524 | 53.925500 | 53.970832 | 54.149424 | 54.173588 | 54.033765 |
lasso_model = Lasso()
param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}
# Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=lasso_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train_scaled, y_train)
# Get the best model
best_lasso_model = grid_search.best_estimator_
# Make predictions on the test set
lasso_pred_base = best_lasso_model.predict(X_test_scaled)
# Evaluate the best model
mse = mean_squared_error(y_test, lasso_pred_base)
rmse = mean_squared_error(y_test, lasso_pred_base, squared=False)
mae = mean_absolute_error(y_test, lasso_pred_base)
r2 = r2_score(y_test, lasso_pred_base)
print("Best Lasso Regression Model:")
print(f"Best alpha: {best_lasso_model.alpha}")
print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
print(f"Mean Squared Error: {np.round(mse,3)}")
print(f"Mean Absolute Error: {np.round(mae,3)}")
print(f"R2 Score: {np.round(r2,3)}")
lasso_score = {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'R2': r2
}
Best Lasso Regression Model: Best alpha: 0.001 Root Mean Squared Error (RMSE): 0.967 Mean Squared Error: 0.935 Mean Absolute Error: 0.662 R2 Score: 0.977
lasso_base_feature_importance = plot_feature_importance(best_lasso_model,X_train,20)
lasso_base_feature_importance[:20]
| Feature | Importance | |
|---|---|---|
| 0 | ema_9 | 4.871884 |
| 1 | macd | 1.751569 |
| 2 | macd_signal | 1.345834 |
| 3 | close_3d_ago | 0.958941 |
| 4 | open_15d_avg | 0.639682 |
| 5 | rsi | 0.562192 |
| 6 | sma_15 | 0.495187 |
| 7 | low_1d_ago | 0.416806 |
| 8 | open_2w_ago | 0.305712 |
| 9 | adj_close_3d_avg | 0.290374 |
| 10 | sma_30 | 0.269198 |
| 11 | high_2w_ago | 0.211690 |
| 12 | low_3w_ago | 0.159687 |
| 13 | open_30d_avg | 0.154355 |
| 14 | volume_5d_avg | 0.096779 |
| 15 | close_2w_ago | 0.090966 |
| 16 | high_30d_avg | 0.082343 |
| 17 | low_30d_avg | 0.076811 |
| 18 | open_3w_ago | 0.065410 |
| 19 | open_5d_ago | 0.064327 |
prediction_df['lasso_pred_base'] = lasso_pred_base
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | lasso_pred_base | |
|---|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.234807 | 54.271747 | 54.203303 | 53.928125 | 54.233949 | 54.297031 | 54.481490 |
| 1730 | 2020-01-03 | 54.150002 | 54.462575 | 54.607093 | 54.564688 | 54.251539 | 54.459871 | 54.574923 | 54.207026 |
| 1731 | 2020-01-06 | 53.919998 | 54.392052 | 54.587532 | 54.570543 | 54.299935 | 54.363214 | 54.606500 | 53.919954 |
| 1732 | 2020-01-07 | 54.049999 | 53.866825 | 53.925943 | 53.969683 | 54.039427 | 53.879662 | 53.954122 | 53.833212 |
| 1733 | 2020-01-08 | 54.189999 | 54.141524 | 53.925500 | 53.970832 | 54.149424 | 54.173588 | 54.033765 | 53.915187 |
keep_cols20 = lasso_base_feature_importance[:20]['Feature'].tolist()
X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]
scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)
# Train model
lasso_model20 = Lasso(alpha=0.001)
lasso_model20.fit(X_train_scaled20, y_train)
# Make predictions on the scaled test set
lasso_pred20 = lasso_model20.predict(X_test_scaled20)
lasso_score20 = evaluate_regression_model(y_test, lasso_pred20)
Mean Squared Error (MSE): 0.95 Root Mean Squared Error (RMSE): 0.975 Mean Absolute Error (MAE): 0.667 R-squared (R2): 0.977
plot_feature_importance(lasso_model20,X_train20,20)
| Feature | Importance | |
|---|---|---|
| 0 | ema_9 | 4.359165 |
| 1 | macd | 1.683244 |
| 2 | open_15d_avg | 1.543022 |
| 3 | macd_signal | 1.304271 |
| 4 | close_3d_ago | 0.919101 |
| 5 | open_2w_ago | 0.588218 |
| 6 | rsi | 0.538464 |
| 7 | sma_30 | 0.471702 |
| 8 | low_1d_ago | 0.460356 |
| 9 | adj_close_3d_avg | 0.362687 |
| 10 | low_3w_ago | 0.172657 |
| 11 | high_2w_ago | 0.107719 |
| 12 | volume_5d_avg | 0.023766 |
| 13 | close_2w_ago | 0.012261 |
| 14 | open_3w_ago | 0.008425 |
| 15 | sma_15 | 0.000000 |
| 16 | open_30d_avg | 0.000000 |
| 17 | high_30d_avg | 0.000000 |
| 18 | low_30d_avg | 0.000000 |
| 19 | open_5d_ago | 0.000000 |
prediction_df['lasso_pred20'] = lasso_pred20
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | lasso_pred_base | lasso_pred20 | |
|---|---|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.234807 | 54.271747 | 54.203303 | 53.928125 | 54.233949 | 54.297031 | 54.481490 | 54.540274 |
| 1730 | 2020-01-03 | 54.150002 | 54.462575 | 54.607093 | 54.564688 | 54.251539 | 54.459871 | 54.574923 | 54.207026 | 54.313239 |
| 1731 | 2020-01-06 | 53.919998 | 54.392052 | 54.587532 | 54.570543 | 54.299935 | 54.363214 | 54.606500 | 53.919954 | 53.991448 |
| 1732 | 2020-01-07 | 54.049999 | 53.866825 | 53.925943 | 53.969683 | 54.039427 | 53.879662 | 53.954122 | 53.833212 | 53.885031 |
| 1733 | 2020-01-08 | 54.189999 | 54.141524 | 53.925500 | 53.970832 | 54.149424 | 54.173588 | 54.033765 | 53.915187 | 53.943523 |
elastic_net_model = ElasticNet()
# Define the hyperparameter grid to search
param_grid = {
'alpha': [0.001, 0.01, 0.1, 1, 10, 100],
'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9]
}
# Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=elastic_net_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train_scaled, y_train)
# Get the best model
best_elastic_net_model = grid_search.best_estimator_
# Make predictions on the test set
elastic_pred_base = best_elastic_net_model.predict(X_test_scaled)
# Evaluate the best model
mse = mean_squared_error(y_test, elastic_pred_base)
rmse = mean_squared_error(y_test, elastic_pred_base, squared=False)
mae = mean_absolute_error(y_test, elastic_pred_base)
r2 = r2_score(y_test, elastic_pred_base)
print("Best Elastic Net Model:")
print(f"Best alpha: {best_elastic_net_model.alpha}")
print(f"Best l1_ratio: {best_elastic_net_model.l1_ratio}")
print(f'Root Mean Squared Error (RMSE): {np.round(rmse,3)}')
print(f"Mean Squared Error: {np.round(mse,3)}")
print(f"Mean Absolute Error: {np.round(mae,3)}")
print(f"R2 Score: {np.round(r2,3)}")
elastic_score = {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'R2': r2
}
Best Elastic Net Model: Best alpha: 0.001 Best l1_ratio: 0.9 Root Mean Squared Error (RMSE): 0.964 Mean Squared Error: 0.929 Mean Absolute Error: 0.66 R2 Score: 0.977
elastic_base_feature_importance = plot_feature_importance(best_elastic_net_model,X_train,20)
elastic_base_feature_importance[:20]
| Feature | Importance | |
|---|---|---|
| 0 | ema_9 | 4.372681 |
| 1 | macd | 1.808266 |
| 2 | macd_signal | 1.391821 |
| 3 | close_3d_ago | 0.961167 |
| 4 | sma_15 | 0.856338 |
| 5 | open_15d_avg | 0.654922 |
| 6 | rsi | 0.556937 |
| 7 | low_1d_ago | 0.425022 |
| 8 | sma_30 | 0.335015 |
| 9 | open_2w_ago | 0.294117 |
| 10 | adj_close_3d_avg | 0.290065 |
| 11 | high_2w_ago | 0.224628 |
| 12 | open_30d_avg | 0.213522 |
| 13 | low_3w_ago | 0.172645 |
| 14 | volume_5d_avg | 0.103550 |
| 15 | open_5d_ago | 0.087747 |
| 16 | high_30d_avg | 0.086648 |
| 17 | low_30d_avg | 0.083314 |
| 18 | open_3w_ago | 0.076077 |
| 19 | close_2w_ago | 0.072879 |
prediction_df['elastic_pred_base'] = elastic_pred_base
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | lasso_pred_base | lasso_pred20 | elastic_pred_base | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.234807 | 54.271747 | 54.203303 | 53.928125 | 54.233949 | 54.297031 | 54.481490 | 54.540274 | 54.475621 |
| 1730 | 2020-01-03 | 54.150002 | 54.462575 | 54.607093 | 54.564688 | 54.251539 | 54.459871 | 54.574923 | 54.207026 | 54.313239 | 54.203798 |
| 1731 | 2020-01-06 | 53.919998 | 54.392052 | 54.587532 | 54.570543 | 54.299935 | 54.363214 | 54.606500 | 53.919954 | 53.991448 | 53.919188 |
| 1732 | 2020-01-07 | 54.049999 | 53.866825 | 53.925943 | 53.969683 | 54.039427 | 53.879662 | 53.954122 | 53.833212 | 53.885031 | 53.834623 |
| 1733 | 2020-01-08 | 54.189999 | 54.141524 | 53.925500 | 53.970832 | 54.149424 | 54.173588 | 54.033765 | 53.915187 | 53.943523 | 53.922324 |
keep_cols20 = elastic_base_feature_importance[:20]['Feature'].tolist()
X_train20 = X_train[keep_cols20]
X_test20 = X_test[keep_cols20]
scaler = StandardScaler()
X_train_scaled20 = scaler.fit_transform(X_train20)
X_test_scaled20 = scaler.transform(X_test20)
# Train model
elastic_model20 = ElasticNet(alpha=0.001,l1_ratio = 0.9)
elastic_model20.fit(X_train_scaled20, y_train)
# Make predictions on the scaled test set
elastic_pred20 = elastic_model20.predict(X_test_scaled20)
elastic_score20 = evaluate_regression_model(y_test, elastic_pred20)
Mean Squared Error (MSE): 0.96 Root Mean Squared Error (RMSE): 0.98 Mean Absolute Error (MAE): 0.667 R-squared (R2): 0.977
plot_feature_importance(elastic_model20,X_train20,20)
| Feature | Importance | |
|---|---|---|
| 0 | ema_9 | 3.674293 |
| 1 | macd | 1.750184 |
| 2 | sma_15 | 1.705084 |
| 3 | macd_signal | 1.361563 |
| 4 | close_3d_ago | 0.845265 |
| 5 | sma_30 | 0.726128 |
| 6 | rsi | 0.539805 |
| 7 | low_1d_ago | 0.501059 |
| 8 | open_2w_ago | 0.363514 |
| 9 | open_15d_avg | 0.326419 |
| 10 | adj_close_3d_avg | 0.309635 |
| 11 | high_2w_ago | 0.291731 |
| 12 | low_3w_ago | 0.195555 |
| 13 | volume_5d_avg | 0.026565 |
| 14 | low_30d_avg | 0.000549 |
| 15 | open_30d_avg | 0.000468 |
| 16 | open_5d_ago | 0.000000 |
| 17 | high_30d_avg | 0.000000 |
| 18 | open_3w_ago | 0.000000 |
| 19 | close_2w_ago | 0.000000 |
prediction_df['elastic_pred20'] = elastic_pred20
prediction_df.head()
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | lasso_pred_base | lasso_pred20 | elastic_pred_base | elastic_pred20 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.234807 | 54.271747 | 54.203303 | 53.928125 | 54.233949 | 54.297031 | 54.481490 | 54.540274 | 54.475621 | 54.512223 |
| 1730 | 2020-01-03 | 54.150002 | 54.462575 | 54.607093 | 54.564688 | 54.251539 | 54.459871 | 54.574923 | 54.207026 | 54.313239 | 54.203798 | 54.293587 |
| 1731 | 2020-01-06 | 53.919998 | 54.392052 | 54.587532 | 54.570543 | 54.299935 | 54.363214 | 54.606500 | 53.919954 | 53.991448 | 53.919188 | 53.995023 |
| 1732 | 2020-01-07 | 54.049999 | 53.866825 | 53.925943 | 53.969683 | 54.039427 | 53.879662 | 53.954122 | 53.833212 | 53.885031 | 53.834623 | 53.869575 |
| 1733 | 2020-01-08 | 54.189999 | 54.141524 | 53.925500 | 53.970832 | 54.149424 | 54.173588 | 54.033765 | 53.915187 | 53.943523 | 53.922324 | 53.944519 |
ela_df = pd.DataFrame([elastic_score.keys(),elastic_score.values()])
ela_df.columns = ela_df.iloc[0]
ela_df = ela_df[1:].reset_index(drop=True)
ela_df['Model'] = 'Elastic_Net with All Features'
ela_20_df = pd.DataFrame([elastic_score20.keys(),elastic_score20.values()])
ela_20_df.columns = ela_20_df.iloc[0]
ela_20_df = ela_20_df[1:].reset_index(drop=True)
ela_20_df['Model'] = 'Elastic_Net with Top 20 Features'
lasso_df = pd.DataFrame([lasso_score.keys(),lasso_score.values()])
lasso_df.columns = lasso_df.iloc[0]
lasso_df = lasso_df[1:].reset_index(drop=True)
lasso_df['Model'] = 'Lasso with All Features'
lasso_20_df = pd.DataFrame([lasso_score20.keys(),lasso_score20.values()])
lasso_20_df.columns = lasso_20_df.iloc[0]
lasso_20_df = lasso_20_df[1:].reset_index(drop=True)
lasso_20_df['Model'] = 'Lasso with Top 20 Features'
ridge_df = pd.DataFrame([ridge_score.keys(),ridge_score.values()])
ridge_df.columns = ridge_df.iloc[0]
ridge_df = ridge_df[1:].reset_index(drop=True)
ridge_df['Model'] = 'Ridge with All Features'
ridge_20_df = pd.DataFrame([ridge_score20.keys(),ridge_score20.values()])
ridge_20_df.columns = ridge_20_df.iloc[0]
ridge_20_df = ridge_20_df[1:].reset_index(drop=True)
ridge_20_df['Model'] = 'Ridge with Top 20 Features'
lr_base_df = pd.DataFrame([lr_score_base.keys(),lr_score_base.values()])
lr_base_df.columns = lr_base_df.iloc[0]
lr_base_df = lr_base_df[1:].reset_index(drop=True)
lr_base_df['Model'] = 'Linear Reg. with All Features'
lr_20_df = pd.DataFrame([lr_score20.keys(),lr_score20.values()])
lr_20_df.columns = lr_20_df.iloc[0]
lr_20_df = lr_20_df[1:].reset_index(drop=True)
lr_20_df['Model'] = 'Linear Reg. with Top 20 Features'
lr_15_df = pd.DataFrame([lr_score15.keys(),lr_score15.values()])
lr_15_df.columns = lr_15_df.iloc[0]
lr_15_df = lr_15_df[1:].reset_index(drop=True)
lr_15_df['Model'] = 'Linear Reg. with Top 15 Features'
lr_10_df = pd.DataFrame([lr_score10.keys(),lr_score10.values()])
lr_10_df.columns = lr_10_df.iloc[0]
lr_10_df = lr_10_df[1:].reset_index(drop=True)
lr_10_df['Model'] = 'Linear Reg. with Top 10 Features'
df_compare = pd.concat([ela_df,lasso_df,ridge_df,ela_20_df,lasso_20_df,ridge_20_df,
lr_base_df,lr_20_df,lr_15_df,lr_10_df]).sort_values(by=['R2'],ascending=False).reset_index(drop=True)
df_compare
| MSE | RMSE | MAE | R2 | Model | |
|---|---|---|---|---|---|
| 0 | 0.76015 | 0.871866 | 0.60886 | 0.981419 | Ridge with All Features |
| 1 | 0.764312 | 0.874249 | 0.608256 | 0.981317 | Linear Reg. with Top 15 Features |
| 2 | 0.767788 | 0.876235 | 0.612101 | 0.981232 | Linear Reg. with All Features |
| 3 | 0.771305 | 0.87824 | 0.60942 | 0.981146 | Ridge with Top 20 Features |
| 4 | 0.777146 | 0.881559 | 0.610959 | 0.981003 | Linear Reg. with Top 20 Features |
| 5 | 0.843116 | 0.918214 | 0.633746 | 0.979391 | Linear Reg. with Top 10 Features |
| 6 | 0.928855 | 0.963771 | 0.659537 | 0.977295 | Elastic_Net with All Features |
| 7 | 0.935026 | 0.966967 | 0.661521 | 0.977144 | Lasso with All Features |
| 8 | 0.949732 | 0.974542 | 0.666845 | 0.976785 | Lasso with Top 20 Features |
| 9 | 0.959534 | 0.979558 | 0.667449 | 0.976545 | Elastic_Net with Top 20 Features |
prediction_df
| date | y_test | lr_pred_base | lr_pred20 | lr_pred15 | lr_pred10 | ridge_pred_base | ridge_pred20 | lasso_pred_base | lasso_pred20 | elastic_pred_base | elastic_pred20 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1729 | 2020-01-02 | 54.240002 | 54.234807 | 54.271747 | 54.203303 | 53.928125 | 54.233949 | 54.297031 | 54.481490 | 54.540274 | 54.475621 | 54.512223 |
| 1730 | 2020-01-03 | 54.150002 | 54.462575 | 54.607093 | 54.564688 | 54.251539 | 54.459871 | 54.574923 | 54.207026 | 54.313239 | 54.203798 | 54.293587 |
| 1731 | 2020-01-06 | 53.919998 | 54.392052 | 54.587532 | 54.570543 | 54.299935 | 54.363214 | 54.606500 | 53.919954 | 53.991448 | 53.919188 | 53.995023 |
| 1732 | 2020-01-07 | 54.049999 | 53.866825 | 53.925943 | 53.969683 | 54.039427 | 53.879662 | 53.954122 | 53.833212 | 53.885031 | 53.834623 | 53.869575 |
| 1733 | 2020-01-08 | 54.189999 | 54.141524 | 53.925500 | 53.970832 | 54.149424 | 54.173588 | 54.033765 | 53.915187 | 53.943523 | 53.922324 | 53.944519 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2694 | 2023-11-01 | 67.970001 | 67.284685 | 66.932767 | 67.028746 | 67.004810 | 67.325547 | 66.883598 | 66.893549 | 66.983023 | 66.898251 | 66.962332 |
| 2695 | 2023-11-02 | 68.820000 | 67.961443 | 67.903284 | 67.963962 | 67.941089 | 67.983409 | 67.964641 | 67.421988 | 67.462660 | 67.435336 | 67.464215 |
| 2696 | 2023-11-03 | 68.239998 | 68.862446 | 69.001802 | 69.034374 | 68.480353 | 68.837447 | 68.941355 | 68.232541 | 68.282736 | 68.249280 | 68.273542 |
| 2697 | 2023-11-06 | 68.489998 | 68.073511 | 68.184944 | 68.248638 | 67.791667 | 68.100132 | 68.219073 | 68.189779 | 68.252203 | 68.190293 | 68.232252 |
| 2698 | 2023-11-07 | 69.019997 | 68.266807 | 68.945917 | 69.051137 | 68.728268 | 68.332505 | 69.018382 | 68.466769 | 68.568573 | 68.473424 | 68.545360 |
970 rows × 12 columns
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred_base,label='lr_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred20,label='lr_pred20')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred15,label='lr_pred15')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred10,label='lr_pred10')
sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred_base,label='ridge_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred20,label='ridge_pred20')
sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred_base,label='lasso_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred20,label='lasso_pred20')
sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred_base,label='elastic_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred20,label='elastic_pred20')
plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.show()
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred_base,label='lr_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred20,label='lr_pred20')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred15,label='lr_pred15')
sns.lineplot(x = prediction_df.date, y=prediction_df.lr_pred10,label='lr_pred10')
plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.show()
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')
sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred_base,label='ridge_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.ridge_pred20,label='ridge_pred20')
plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.show()
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')
sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred_base,label='lasso_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.lasso_pred20,label='lasso_pred20')
plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.show()
plt.figure(figsize=(20, 10))
sns.lineplot(x = prediction_df.date, y=prediction_df.y_test,label='y_test')
sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred_base,label='elastic_pred_base')
sns.lineplot(x = prediction_df.date, y=prediction_df.elastic_pred20,label='elastic_pred20')
plt.legend(prop={'size': 14, 'weight': 'bold'})
plt.title('Model Prediction Comparison', fontsize=16)
plt.ylabel('Prediction', fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.show()
# target column is next day's close price
y_train = train_df['close_1d_next'].copy()
X_train = train_df.drop(['close_1d_next'], 1)
# target column is next day's close price
y_test = test_df['close_1d_next'].copy()
X_test = test_df.drop(['close_1d_next'], 1)
def train_ridge_regression(X_train,X_test,y_train,y_test):
ridge_model = Ridge(alpha= 0.001)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Train model
ridge_model = Ridge(alpha=0.001)
ridge_model.fit(X_train_scaled, y_train)
# Make predictions on the scaled test set
ridge_pred = ridge_model.predict(X_test_scaled)
ridge_score = evaluate_regression_model2(y_test, ridge_pred)
return ridge_model,ridge_pred,ridge_score
ridge_model, ridge_pred, ridge_score = train_ridge_regression(X_train,X_test,y_train,y_test)
ridge_score
{'MSE': 0.7601496984667533,
'RMSE': 0.8718656424396785,
'MAE': 0.6088603450778082,
'R2': 0.981418935107418}
ridge_pred[:15]
array([54.23394893, 54.45987053, 54.36321399, 53.87966228, 54.1735875 ,
54.09581315, 54.16195972, 54.70517195, 54.39406442, 54.76593117,
55.13141383, 55.07110404, 55.40303391, 55.39633407, 55.50978756])
plot_regression_accuracy(y_test, ridge_pred)
plot_predictions(df,ridge_pred)
def preprocess_data(df):
df['ema_9'] = df['close'].ewm(9).mean().shift()
df['sma_5'] = df['close'].rolling(5).mean().shift()
df['sma_10'] = df['close'].rolling(10).mean().shift()
df['sma_15'] = df['close'].rolling(15).mean().shift()
df['sma_30'] = df['close'].rolling(30).mean().shift()
df['rsi'] = rsi(df) #.fillna(0)
df['mfi'] = mfi(df, 14)
EMA_12 = pd.Series(df['close'].ewm(span=12, min_periods=12).mean())
EMA_26 = pd.Series(df['close'].ewm(span=26, min_periods=26).mean())
df['macd'] = pd.Series(EMA_12 - EMA_26)
df['macd_signal'] = pd.Series(df.macd.ewm(span=9, min_periods=9).mean())
df['close_1d_next'] = df['close'].shift(-1)
df['close_1d_ago'] = df['close'].shift(1)
df['close_3d_ago'] = df['close'].shift(3)
df['close_5d_ago'] = df['close'].shift(5)
df['close_1w_ago'] = df['close'].shift(7)
df['close_2w_ago'] = df['close'].shift(14)
df['close_3w_ago'] = df['close'].shift(21)
df['close_4w_ago'] = df['close'].shift(28)
df['adj_close_1d_ago'] = df['adj close'].shift(1)
df['adj_close_3d_ago'] = df['adj close'].shift(3)
df['adj_close_5d_ago'] = df['adj close'].shift(5)
df['adj_close_1w_ago'] = df['adj close'].shift(7)
df['adj_close_2w_ago'] = df['adj close'].shift(14)
df['adj_close_3w_ago'] = df['adj close'].shift(21)
df['adj_close_4w_ago'] = df['adj close'].shift(28)
df['open_1d_ago'] = df['open'].shift(1)
df['open_3d_ago'] = df['open'].shift(3)
df['open_5d_ago'] = df['open'].shift(5)
df['open_1w_ago'] = df['open'].shift(7)
df['open_2w_ago'] = df['open'].shift(14)
df['open_3w_ago'] = df['open'].shift(21)
df['open_4w_ago'] = df['open'].shift(28)
df['high_1d_ago'] = df['high'].shift(1)
df['high_3d_ago'] = df['high'].shift(3)
df['high_5d_ago'] = df['high'].shift(5)
df['high_1w_ago'] = df['high'].shift(7)
df['high_2w_ago'] = df['high'].shift(14)
df['high_3w_ago'] = df['high'].shift(21)
df['high_4w_ago'] = df['high'].shift(28)
df['low_1d_ago'] = df['low'].shift(1)
df['low_3d_ago'] = df['low'].shift(3)
df['low_5d_ago'] = df['low'].shift(5)
df['low_1w_ago'] = df['low'].shift(7)
df['low_2w_ago'] = df['low'].shift(14)
df['low_3w_ago'] = df['low'].shift(21)
df['low_4w_ago'] = df['low'].shift(28)
df['volume_1d_ago'] = df['volume'].shift(1)
df['volume_3d_ago'] = df['volume'].shift(3)
df['volume_5d_ago'] = df['volume'].shift(5)
df['volume_1w_ago'] = df['volume'].shift(7)
df['volume_2w_ago'] = df['volume'].shift(14)
df['volume_3w_ago'] = df['volume'].shift(21)
df['volume_4w_ago'] = df['volume'].shift(28)
df['open_3d_avg'] = df['open'].rolling(window=3).mean()
df['open_5d_avg'] = df['open'].rolling(window=5).mean()
df['open_7d_avg'] = df['open'].rolling(window=7).mean()
df['open_10d_avg'] = df['open'].rolling(window=10).mean()
df['open_15d_avg'] = df['open'].rolling(window=15).mean()
df['open_30d_avg'] = df['open'].rolling(window=30).mean()
df['high_3d_avg'] = df['high'].rolling(window=3).mean()
df['high_5d_avg'] = df['high'].rolling(window=5).mean()
df['high_7d_avg'] = df['high'].rolling(window=7).mean()
df['high_10d_avg'] = df['high'].rolling(window=10).mean()
df['high_15d_avg'] = df['high'].rolling(window=15).mean()
df['high_30d_avg'] = df['high'].rolling(window=30).mean()
df['low_3d_avg'] = df['low'].rolling(window=3).mean()
df['low_5d_avg'] = df['low'].rolling(window=5).mean()
df['low_7d_avg'] = df['low'].rolling(window=7).mean()
df['low_10d_avg'] = df['low'].rolling(window=10).mean()
df['low_15d_avg'] = df['low'].rolling(window=15).mean()
df['low_30d_avg'] = df['low'].rolling(window=30).mean()
df['volume_3d_avg'] = df['volume'].rolling(window=3).mean()
df['volume_5d_avg'] = df['volume'].rolling(window=5).mean()
df['volume_7d_avg'] = df['volume'].rolling(window=7).mean()
df['volume_10d_avg'] = df['volume'].rolling(window=10).mean()
df['volume_15d_avg'] = df['volume'].rolling(window=15).mean()
df['volume_30d_avg'] = df['volume'].rolling(window=30).mean()
df['adj_close_3d_avg'] = df['adj close'].rolling(window=3).mean()
df['adj_close_5d_avg'] = df['adj close'].rolling(window=5).mean()
df['adj_close_7d_avg'] = df['adj close'].rolling(window=7).mean()
df['adj_close_10d_avg'] = df['adj close'].rolling(window=10).mean()
df['adj_close_15d_avg'] = df['adj close'].rolling(window=15).mean()
df['adj_close_30d_avg'] = df['adj close'].rolling(window=30).mean()
return df
df_all = pd.read_parquet(out_loc+"stock_1d.parquet")
df_all.columns = df_all.columns.str.lower()
### keep stocks in data with min year 2013, max year 2023
stock_min_dt = pd.DataFrame(df_all.groupby('symbol')['date'].min()).reset_index().rename(columns={'date':'min_date'})
stock_max_dt = pd.DataFrame(df_all.groupby('symbol')['date'].max()).reset_index().rename(columns={'date':'max_date'})
stock_cnt_dt = pd.DataFrame(df_all.groupby('symbol')['date'].count()).reset_index().rename(columns={'date':'days_cnt'})
stock_cnt = stock_min_dt.merge(stock_max_dt,on='symbol').merge(stock_cnt_dt,on='symbol')
stock_cnt['min_year'] = stock_cnt['min_date'].dt.year
stock_cnt['max_year'] = stock_cnt['max_date'].dt.year
keep_stocks = stock_cnt[(stock_cnt['min_year']==2013)&(stock_cnt['max_year']==2023)&(stock_cnt['days_cnt']>=2500)]['symbol'].unique().tolist()
stock_cnt.head()
| symbol | min_date | max_date | days_cnt | min_year | max_year | |
|---|---|---|---|---|---|---|
| 0 | A | 2013-01-02 | 2023-11-08 | 2733 | 2013 | 2023 |
| 1 | AAL | 2013-01-02 | 2023-11-08 | 2733 | 2013 | 2023 |
| 2 | AAPL | 2013-01-02 | 2023-11-08 | 2733 | 2013 | 2023 |
| 3 | ABBV | 2013-01-02 | 2023-11-08 | 2733 | 2013 | 2023 |
| 4 | ABNB | 2020-12-10 | 2023-11-08 | 733 | 2020 | 2023 |
df_2023 = df_all[(df_all.date.dt.year==2023) & (df_all.symbol.isin(keep_stocks))]
# volume vs stocks
volume_2023 = pd.DataFrame(df_2023.groupby(['symbol','security','gics sector'])['volume'].sum()).reset_index()
volume_2023 = volume_2023.sort_values(by='volume',ascending=False).reset_index(drop=True)
volume_2023.head()
| symbol | security | gics sector | volume | |
|---|---|---|---|---|
| 0 | TSLA | Tesla, Inc. | Consumer Discretionary | 3.009291e+10 |
| 1 | AMD | AMD | Information Technology | 1.342035e+10 |
| 2 | AMZN | Amazon | Consumer Discretionary | 1.305160e+10 |
| 3 | AAPL | Apple Inc. | Information Technology | 1.303964e+10 |
| 4 | F | Ford Motor Company | Consumer Discretionary | 1.278319e+10 |
# volume vs sectors
sector_2023 = pd.DataFrame(df_2023.groupby(['gics sector'])['volume'].sum()).reset_index()
sector_2023 = sector_2023.sort_values(by='volume',ascending=False).reset_index(drop=True)
sector_2023
| gics sector | volume | |
|---|---|---|
| 0 | Consumer Discretionary | 9.171407e+10 |
| 1 | Information Technology | 8.888840e+10 |
| 2 | Financials | 6.728113e+10 |
| 3 | Communication Services | 5.267892e+10 |
| 4 | Health Care | 3.755560e+10 |
| 5 | Industrials | 3.672492e+10 |
| 6 | Energy | 3.245171e+10 |
| 7 | Consumer Staples | 2.824873e+10 |
| 8 | Utilities | 2.214882e+10 |
| 9 | Materials | 1.432867e+10 |
| 10 | Real Estate | 1.318748e+10 |
# filter top 5 sectors with highest volume in 2023
sector_list = sector_2023[:5]['gics sector'].tolist()
stock_list = []
num_stocks = 5
# stocks with highest volume in each sector
for sec in sector_list:
stock_list.append(volume_2023[volume_2023['gics sector']==sec]['symbol'][:num_stocks].tolist())
stock_list = [item for sublist in stock_list for item in sublist]
len(stock_list)
25
df_stocks = df_all[df_all['symbol'].isin(stock_list)].reset_index(drop=True)
df_stocks.head()
| date | open | high | low | close | adj close | volume | symbol | security | gics sector | gics sub-industry | headquarters location | date added | cik | founded | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2013-01-02 | 18.003504 | 18.193193 | 17.931683 | 18.099348 | 18.099348 | 101550348.0 | GOOGL | Alphabet Inc. (Class A) | Communication Services | Interactive Media & Services | Mountain View, California | 2014-04-03 | 1652044 | 1998 |
| 1 | 2013-01-03 | 18.141392 | 18.316566 | 18.036036 | 18.109859 | 18.109859 | 92635272.0 | GOOGL | Alphabet Inc. (Class A) | Communication Services | Interactive Media & Services | Mountain View, California | 2014-04-03 | 1652044 | 1998 |
| 2 | 2013-01-04 | 18.251753 | 18.555305 | 18.210211 | 18.467718 | 18.467718 | 110429460.0 | GOOGL | Alphabet Inc. (Class A) | Communication Services | Interactive Media & Services | Mountain View, California | 2014-04-03 | 1652044 | 1998 |
| 3 | 2013-01-07 | 18.404655 | 18.503002 | 18.282784 | 18.387136 | 18.387136 | 66161772.0 | GOOGL | Alphabet Inc. (Class A) | Communication Services | Interactive Media & Services | Mountain View, California | 2014-04-03 | 1652044 | 1998 |
| 4 | 2013-01-08 | 18.406906 | 18.425926 | 18.128880 | 18.350851 | 18.350851 | 66976956.0 | GOOGL | Alphabet Inc. (Class A) | Communication Services | Interactive Media & Services | Mountain View, California | 2014-04-03 | 1652044 | 1998 |
stock_compare = []
for stock in stock_list:
stock_data = df_stocks[df_stocks['symbol'] == stock]
stock_data = preprocess_data(stock_data)
stock_data = stock_data.dropna().reset_index(drop=True)
# Split the DataFrame into training and testing sets
train_df_temp = stock_data[stock_data.date.dt.year<2020]
test_df_temp = stock_data[stock_data.date.dt.year>=2020]
drop_cols1 = ['date','open','high','low','close','adj close','volume','symbol','security',
'gics sector','gics sub-industry','headquarters location','date added','cik','founded']
train_df_temp = train_df_temp.drop(drop_cols1, 1)
test_df_temp = test_df_temp.drop(drop_cols1, 1)
# target column is next day's close price
y_train_temp = train_df_temp['close_1d_next'].copy()
X_train_temp = train_df_temp.drop(['close_1d_next'], 1)
# target column is next day's close price
y_test_temp = test_df_temp['close_1d_next'].copy()
X_test_temp = test_df_temp.drop(['close_1d_next'], 1)
# print(stock, len(X_train), len(X_test), len(y_train), len(y_test))
temp_model, temp_pred, temp_score = train_ridge_regression(X_train_temp,X_test_temp,y_train_temp,y_test_temp)
score_df = pd.DataFrame([temp_score.keys(),temp_score.values()])
score_df.columns = score_df.iloc[0]
score_df = score_df[1:].reset_index(drop=True)
score_df['symbol'] = stock
stock_compare.append(score_df)
compare_df = pd.concat(stock_compare).sort_values(by='R2',ascending=False).reset_index(drop =True)
compare_df
| MSE | RMSE | MAE | R2 | symbol | |
|---|---|---|---|---|---|
| 0 | 56.919689 | 7.544514 | 5.112978 | 0.995061 | NVDA |
| 1 | 0.559182 | 0.747785 | 0.529467 | 0.993251 | VZ |
| 2 | 7.770865 | 2.787627 | 2.107652 | 0.992581 | AAPL |
| 3 | 5.039335 | 2.244846 | 1.670374 | 0.992108 | GOOG |
| 4 | 4.98575 | 2.232879 | 1.654225 | 0.992002 | GOOGL |
| 5 | 46.381058 | 6.810364 | 4.690443 | 0.990596 | META |
| 6 | 2.057386 | 1.434359 | 1.020137 | 0.990386 | CVS |
| 7 | 0.147953 | 0.384647 | 0.271314 | 0.990362 | F |
| 8 | 1.27562 | 1.129434 | 0.839937 | 0.98989 | GM |
| 9 | 28.055837 | 5.296776 | 3.992424 | 0.989314 | MSFT |
| 10 | 84.835254 | 9.210606 | 6.497099 | 0.988764 | TSLA |
| 11 | 0.575533 | 0.758639 | 0.55086 | 0.988709 | PFE |
| 12 | 0.581909 | 0.76283 | 0.577919 | 0.988411 | BAC |
| 13 | 0.26222 | 0.512075 | 0.377068 | 0.988041 | KEY |
| 14 | 1.80572 | 1.343771 | 0.958788 | 0.987257 | INTC |
| 15 | 0.991949 | 0.995966 | 0.734726 | 0.987031 | WFC |
| 16 | 11.147927 | 3.338851 | 2.469883 | 0.985621 | AMZN |
| 17 | 0.174755 | 0.418037 | 0.281833 | 0.983153 | T |
| 18 | 10.145635 | 3.185221 | 2.328928 | 0.982675 | AMD |
| 19 | 2.409729 | 1.55233 | 1.141577 | 0.981822 | C |
| 20 | 0.124353 | 0.352637 | 0.259468 | 0.979688 | HBAN |
| 21 | 0.870412 | 0.932958 | 0.684029 | 0.978487 | BMY |
| 22 | 2.087087 | 1.444675 | 1.154954 | 0.96863 | CCL |
| 23 | 4.398395 | 2.097235 | 1.52279 | 0.967914 | JNJ |
| 24 | 4.070663 | 2.017589 | 1.528408 | 0.589411 | VTRS |